1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13// XXXAR: TODO fix default address space in this file as well
14#define getUnqual(arg) get(arg, 0u)
15
16#include "CGCXXABI.h"
17#include "CGCleanup.h"
18#include "CGOpenMPRuntime.h"
19#include "CGRecordLayout.h"
20#include "CodeGenFunction.h"
21#include "clang/CodeGen/ConstantInitBuilder.h"
22#include "clang/AST/Decl.h"
23#include "clang/AST/StmtOpenMP.h"
24#include "clang/Basic/BitmaskEnum.h"
25#include "llvm/ADT/ArrayRef.h"
26#include "llvm/Bitcode/BitcodeReader.h"
27#include "llvm/IR/DerivedTypes.h"
28#include "llvm/IR/GlobalValue.h"
29#include "llvm/IR/Value.h"
30#include "llvm/Support/Format.h"
31#include "llvm/Support/raw_ostream.h"
32#include <cassert>
33
34using namespace clang;
35using namespace CodeGen;
36
37namespace {
38/// Base class for handling code generation inside OpenMP regions.
39class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
40public:
41 /// Kinds of OpenMP regions used in codegen.
42 enum CGOpenMPRegionKind {
43 /// Region with outlined function for standalone 'parallel'
44 /// directive.
45 ParallelOutlinedRegion,
46 /// Region with outlined function for standalone 'task' directive.
47 TaskOutlinedRegion,
48 /// Region for constructs that do not require function outlining,
49 /// like 'for', 'sections', 'atomic' etc. directives.
50 InlinedRegion,
51 /// Region with outlined function for standalone 'target' directive.
52 TargetRegion,
53 };
54
55 CGOpenMPRegionInfo(const CapturedStmt &CS,
56 const CGOpenMPRegionKind RegionKind,
57 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
58 bool HasCancel)
59 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
60 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
61
62 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
63 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
64 bool HasCancel)
65 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
66 Kind(Kind), HasCancel(HasCancel) {}
67
68 /// Get a variable or parameter for storing global thread id
69 /// inside OpenMP construct.
70 virtual const VarDecl *getThreadIDVariable() const = 0;
71
72 /// Emit the captured statement body.
73 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
74
75 /// Get an LValue for the current ThreadID variable.
76 /// \return LValue for thread id variable. This LValue always has type int32*.
77 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
78
79 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
80
81 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
82
83 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
84
85 bool hasCancel() const { return HasCancel; }
86
87 static bool classof(const CGCapturedStmtInfo *Info) {
88 return Info->getKind() == CR_OpenMP;
89 }
90
91 ~CGOpenMPRegionInfo() override = default;
92
93protected:
94 CGOpenMPRegionKind RegionKind;
95 RegionCodeGenTy CodeGen;
96 OpenMPDirectiveKind Kind;
97 bool HasCancel;
98};
99
100/// API for captured statement code generation in OpenMP constructs.
101class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
102public:
103 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
104 const RegionCodeGenTy &CodeGen,
105 OpenMPDirectiveKind Kind, bool HasCancel,
106 StringRef HelperName)
107 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
108 HasCancel),
109 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
110 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
111 }
112
113 /// Get a variable or parameter for storing global thread id
114 /// inside OpenMP construct.
115 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
116
117 /// Get the name of the capture helper.
118 StringRef getHelperName() const override { return HelperName; }
119
120 static bool classof(const CGCapturedStmtInfo *Info) {
121 return CGOpenMPRegionInfo::classof(Info) &&
122 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
123 ParallelOutlinedRegion;
124 }
125
126private:
127 /// A variable or parameter storing global thread id for OpenMP
128 /// constructs.
129 const VarDecl *ThreadIDVar;
130 StringRef HelperName;
131};
132
133/// API for captured statement code generation in OpenMP constructs.
134class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
135public:
136 class UntiedTaskActionTy final : public PrePostActionTy {
137 bool Untied;
138 const VarDecl *PartIDVar;
139 const RegionCodeGenTy UntiedCodeGen;
140 llvm::SwitchInst *UntiedSwitch = nullptr;
141
142 public:
143 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
144 const RegionCodeGenTy &UntiedCodeGen)
145 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
146 void Enter(CodeGenFunction &CGF) override {
147 if (Untied) {
148 // Emit task switching point.
149 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
150 CGF.GetAddrOfLocalVar(PartIDVar),
151 PartIDVar->getType()->castAs<PointerType>());
152 llvm::Value *Res =
153 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
154 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
155 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
156 CGF.EmitBlock(DoneBB);
157 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
158 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
159 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
160 CGF.Builder.GetInsertBlock());
161 emitUntiedSwitch(CGF);
162 }
163 }
164 void emitUntiedSwitch(CodeGenFunction &CGF) const {
165 if (Untied) {
166 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
167 CGF.GetAddrOfLocalVar(PartIDVar),
168 PartIDVar->getType()->castAs<PointerType>());
169 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
170 PartIdLVal);
171 UntiedCodeGen(CGF);
172 CodeGenFunction::JumpDest CurPoint =
173 CGF.getJumpDestInCurrentScope(".untied.next.");
174 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
175 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
176 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
177 CGF.Builder.GetInsertBlock());
178 CGF.EmitBranchThroughCleanup(CurPoint);
179 CGF.EmitBlock(CurPoint.getBlock());
180 }
181 }
182 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
183 };
184 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
185 const VarDecl *ThreadIDVar,
186 const RegionCodeGenTy &CodeGen,
187 OpenMPDirectiveKind Kind, bool HasCancel,
188 const UntiedTaskActionTy &Action)
189 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
190 ThreadIDVar(ThreadIDVar), Action(Action) {
191 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
192 }
193
194 /// Get a variable or parameter for storing global thread id
195 /// inside OpenMP construct.
196 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
197
198 /// Get an LValue for the current ThreadID variable.
199 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
200
201 /// Get the name of the capture helper.
202 StringRef getHelperName() const override { return ".omp_outlined."; }
203
204 void emitUntiedSwitch(CodeGenFunction &CGF) override {
205 Action.emitUntiedSwitch(CGF);
206 }
207
208 static bool classof(const CGCapturedStmtInfo *Info) {
209 return CGOpenMPRegionInfo::classof(Info) &&
210 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
211 TaskOutlinedRegion;
212 }
213
214private:
215 /// A variable or parameter storing global thread id for OpenMP
216 /// constructs.
217 const VarDecl *ThreadIDVar;
218 /// Action for emitting code for untied tasks.
219 const UntiedTaskActionTy &Action;
220};
221
222/// API for inlined captured statement code generation in OpenMP
223/// constructs.
224class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
225public:
226 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
227 const RegionCodeGenTy &CodeGen,
228 OpenMPDirectiveKind Kind, bool HasCancel)
229 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
230 OldCSI(OldCSI),
231 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
232
233 // Retrieve the value of the context parameter.
234 llvm::Value *getContextValue() const override {
235 if (OuterRegionInfo)
236 return OuterRegionInfo->getContextValue();
237 llvm_unreachable("No context value for inlined OpenMP region");
238 }
239
240 void setContextValue(llvm::Value *V) override {
241 if (OuterRegionInfo) {
242 OuterRegionInfo->setContextValue(V);
243 return;
244 }
245 llvm_unreachable("No context value for inlined OpenMP region");
246 }
247
248 /// Lookup the captured field decl for a variable.
249 const FieldDecl *lookup(const VarDecl *VD) const override {
250 if (OuterRegionInfo)
251 return OuterRegionInfo->lookup(VD);
252 // If there is no outer outlined region,no need to lookup in a list of
253 // captured variables, we can use the original one.
254 return nullptr;
255 }
256
257 FieldDecl *getThisFieldDecl() const override {
258 if (OuterRegionInfo)
259 return OuterRegionInfo->getThisFieldDecl();
260 return nullptr;
261 }
262
263 /// Get a variable or parameter for storing global thread id
264 /// inside OpenMP construct.
265 const VarDecl *getThreadIDVariable() const override {
266 if (OuterRegionInfo)
267 return OuterRegionInfo->getThreadIDVariable();
268 return nullptr;
269 }
270
271 /// Get an LValue for the current ThreadID variable.
272 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
273 if (OuterRegionInfo)
274 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
275 llvm_unreachable("No LValue for inlined OpenMP construct");
276 }
277
278 /// Get the name of the capture helper.
279 StringRef getHelperName() const override {
280 if (auto *OuterRegionInfo = getOldCSI())
281 return OuterRegionInfo->getHelperName();
282 llvm_unreachable("No helper name for inlined OpenMP construct");
283 }
284
285 void emitUntiedSwitch(CodeGenFunction &CGF) override {
286 if (OuterRegionInfo)
287 OuterRegionInfo->emitUntiedSwitch(CGF);
288 }
289
290 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
291
292 static bool classof(const CGCapturedStmtInfo *Info) {
293 return CGOpenMPRegionInfo::classof(Info) &&
294 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
295 }
296
297 ~CGOpenMPInlinedRegionInfo() override = default;
298
299private:
300 /// CodeGen info about outer OpenMP region.
301 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
302 CGOpenMPRegionInfo *OuterRegionInfo;
303};
304
305/// API for captured statement code generation in OpenMP target
306/// constructs. For this captures, implicit parameters are used instead of the
307/// captured fields. The name of the target region has to be unique in a given
308/// application so it is provided by the client, because only the client has
309/// the information to generate that.
310class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
311public:
312 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
313 const RegionCodeGenTy &CodeGen, StringRef HelperName)
314 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
315 /*HasCancel=*/false),
316 HelperName(HelperName) {}
317
318 /// This is unused for target regions because each starts executing
319 /// with a single thread.
320 const VarDecl *getThreadIDVariable() const override { return nullptr; }
321
322 /// Get the name of the capture helper.
323 StringRef getHelperName() const override { return HelperName; }
324
325 static bool classof(const CGCapturedStmtInfo *Info) {
326 return CGOpenMPRegionInfo::classof(Info) &&
327 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
328 }
329
330private:
331 StringRef HelperName;
332};
333
334static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
335 llvm_unreachable("No codegen for expressions");
336}
337/// API for generation of expressions captured in a innermost OpenMP
338/// region.
339class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
340public:
341 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
342 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
343 OMPD_unknown,
344 /*HasCancel=*/false),
345 PrivScope(CGF) {
346 // Make sure the globals captured in the provided statement are local by
347 // using the privatization logic. We assume the same variable is not
348 // captured more than once.
349 for (const auto &C : CS.captures()) {
350 if (!C.capturesVariable() && !C.capturesVariableByCopy())
351 continue;
352
353 const VarDecl *VD = C.getCapturedVar();
354 if (VD->isLocalVarDeclOrParm())
355 continue;
356
357 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
358 /*RefersToEnclosingVariableOrCapture=*/false,
359 VD->getType().getNonReferenceType(), VK_LValue,
360 C.getLocation());
361 PrivScope.addPrivate(
362 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); });
363 }
364 (void)PrivScope.Privatize();
365 }
366
367 /// Lookup the captured field decl for a variable.
368 const FieldDecl *lookup(const VarDecl *VD) const override {
369 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
370 return FD;
371 return nullptr;
372 }
373
374 /// Emit the captured statement body.
375 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
376 llvm_unreachable("No body for expressions");
377 }
378
379 /// Get a variable or parameter for storing global thread id
380 /// inside OpenMP construct.
381 const VarDecl *getThreadIDVariable() const override {
382 llvm_unreachable("No thread id for expressions");
383 }
384
385 /// Get the name of the capture helper.
386 StringRef getHelperName() const override {
387 llvm_unreachable("No helper name for expressions");
388 }
389
390 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
391
392private:
393 /// Private scope to capture global variables.
394 CodeGenFunction::OMPPrivateScope PrivScope;
395};
396
397/// RAII for emitting code of OpenMP constructs.
398class InlinedOpenMPRegionRAII {
399 CodeGenFunction &CGF;
400 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
401 FieldDecl *LambdaThisCaptureField = nullptr;
402 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
403
404public:
405 /// Constructs region for combined constructs.
406 /// \param CodeGen Code generation sequence for combined directives. Includes
407 /// a list of functions used for code generation of implicitly inlined
408 /// regions.
409 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
410 OpenMPDirectiveKind Kind, bool HasCancel)
411 : CGF(CGF) {
412 // Start emission for the construct.
413 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
414 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
415 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
416 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
417 CGF.LambdaThisCaptureField = nullptr;
418 BlockInfo = CGF.BlockInfo;
419 CGF.BlockInfo = nullptr;
420 }
421
422 ~InlinedOpenMPRegionRAII() {
423 // Restore original CapturedStmtInfo only if we're done with code emission.
424 auto *OldCSI =
425 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
426 delete CGF.CapturedStmtInfo;
427 CGF.CapturedStmtInfo = OldCSI;
428 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
429 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
430 CGF.BlockInfo = BlockInfo;
431 }
432};
433
434/// Values for bit flags used in the ident_t to describe the fields.
435/// All enumeric elements are named and described in accordance with the code
436/// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
437enum OpenMPLocationFlags : unsigned {
438 /// Use trampoline for internal microtask.
439 OMP_IDENT_IMD = 0x01,
440 /// Use c-style ident structure.
441 OMP_IDENT_KMPC = 0x02,
442 /// Atomic reduction option for kmpc_reduce.
443 OMP_ATOMIC_REDUCE = 0x10,
444 /// Explicit 'barrier' directive.
445 OMP_IDENT_BARRIER_EXPL = 0x20,
446 /// Implicit barrier in code.
447 OMP_IDENT_BARRIER_IMPL = 0x40,
448 /// Implicit barrier in 'for' directive.
449 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
450 /// Implicit barrier in 'sections' directive.
451 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
452 /// Implicit barrier in 'single' directive.
453 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
454 /// Call of __kmp_for_static_init for static loop.
455 OMP_IDENT_WORK_LOOP = 0x200,
456 /// Call of __kmp_for_static_init for sections.
457 OMP_IDENT_WORK_SECTIONS = 0x400,
458 /// Call of __kmp_for_static_init for distribute.
459 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
460 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
461};
462
463namespace {
464LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
465/// Values for bit flags for marking which requires clauses have been used.
466enum OpenMPOffloadingRequiresDirFlags : int64_t {
467 /// flag undefined.
468 OMP_REQ_UNDEFINED = 0x000,
469 /// no requires clause present.
470 OMP_REQ_NONE = 0x001,
471 /// reverse_offload clause.
472 OMP_REQ_REVERSE_OFFLOAD = 0x002,
473 /// unified_address clause.
474 OMP_REQ_UNIFIED_ADDRESS = 0x004,
475 /// unified_shared_memory clause.
476 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
477 /// dynamic_allocators clause.
478 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
479 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
480};
481} // anonymous namespace
482
483/// Describes ident structure that describes a source location.
484/// All descriptions are taken from
485/// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
486/// Original structure:
487/// typedef struct ident {
488/// kmp_int32 reserved_1; /**< might be used in Fortran;
489/// see above */
490/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
491/// KMP_IDENT_KMPC identifies this union
492/// member */
493/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
494/// see above */
495///#if USE_ITT_BUILD
496/// /* but currently used for storing
497/// region-specific ITT */
498/// /* contextual information. */
499///#endif /* USE_ITT_BUILD */
500/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
501/// C++ */
502/// char const *psource; /**< String describing the source location.
503/// The string is composed of semi-colon separated
504// fields which describe the source file,
505/// the function and a pair of line numbers that
506/// delimit the construct.
507/// */
508/// } ident_t;
509enum IdentFieldIndex {
510 /// might be used in Fortran
511 IdentField_Reserved_1,
512 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
513 IdentField_Flags,
514 /// Not really used in Fortran any more
515 IdentField_Reserved_2,
516 /// Source[4] in Fortran, do not use for C++
517 IdentField_Reserved_3,
518 /// String describing the source location. The string is composed of
519 /// semi-colon separated fields which describe the source file, the function
520 /// and a pair of line numbers that delimit the construct.
521 IdentField_PSource
522};
523
524/// Schedule types for 'omp for' loops (these enumerators are taken from
525/// the enum sched_type in kmp.h).
526enum OpenMPSchedType {
527 /// Lower bound for default (unordered) versions.
528 OMP_sch_lower = 32,
529 OMP_sch_static_chunked = 33,
530 OMP_sch_static = 34,
531 OMP_sch_dynamic_chunked = 35,
532 OMP_sch_guided_chunked = 36,
533 OMP_sch_runtime = 37,
534 OMP_sch_auto = 38,
535 /// static with chunk adjustment (e.g., simd)
536 OMP_sch_static_balanced_chunked = 45,
537 /// Lower bound for 'ordered' versions.
538 OMP_ord_lower = 64,
539 OMP_ord_static_chunked = 65,
540 OMP_ord_static = 66,
541 OMP_ord_dynamic_chunked = 67,
542 OMP_ord_guided_chunked = 68,
543 OMP_ord_runtime = 69,
544 OMP_ord_auto = 70,
545 OMP_sch_default = OMP_sch_static,
546 /// dist_schedule types
547 OMP_dist_sch_static_chunked = 91,
548 OMP_dist_sch_static = 92,
549 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
550 /// Set if the monotonic schedule modifier was present.
551 OMP_sch_modifier_monotonic = (1 << 29),
552 /// Set if the nonmonotonic schedule modifier was present.
553 OMP_sch_modifier_nonmonotonic = (1 << 30),
554};
555
556enum OpenMPRTLFunction {
557 /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
558 /// kmpc_micro microtask, ...);
559 OMPRTL__kmpc_fork_call,
560 /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
561 /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
562 OMPRTL__kmpc_threadprivate_cached,
563 /// Call to void __kmpc_threadprivate_register( ident_t *,
564 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
565 OMPRTL__kmpc_threadprivate_register,
566 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
567 OMPRTL__kmpc_global_thread_num,
568 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
569 // kmp_critical_name *crit);
570 OMPRTL__kmpc_critical,
571 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
572 // global_tid, kmp_critical_name *crit, uintptr_t hint);
573 OMPRTL__kmpc_critical_with_hint,
574 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
575 // kmp_critical_name *crit);
576 OMPRTL__kmpc_end_critical,
577 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
578 // global_tid);
579 OMPRTL__kmpc_cancel_barrier,
580 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
581 OMPRTL__kmpc_barrier,
582 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
583 OMPRTL__kmpc_for_static_fini,
584 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
585 // global_tid);
586 OMPRTL__kmpc_serialized_parallel,
587 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
588 // global_tid);
589 OMPRTL__kmpc_end_serialized_parallel,
590 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
591 // kmp_int32 num_threads);
592 OMPRTL__kmpc_push_num_threads,
593 // Call to void __kmpc_flush(ident_t *loc);
594 OMPRTL__kmpc_flush,
595 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
596 OMPRTL__kmpc_master,
597 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
598 OMPRTL__kmpc_end_master,
599 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
600 // int end_part);
601 OMPRTL__kmpc_omp_taskyield,
602 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
603 OMPRTL__kmpc_single,
604 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
605 OMPRTL__kmpc_end_single,
606 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
607 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
608 // kmp_routine_entry_t *task_entry);
609 OMPRTL__kmpc_omp_task_alloc,
610 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
611 // new_task);
612 OMPRTL__kmpc_omp_task,
613 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
614 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
615 // kmp_int32 didit);
616 OMPRTL__kmpc_copyprivate,
617 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
618 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
619 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
620 OMPRTL__kmpc_reduce,
621 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
622 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
623 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
624 // *lck);
625 OMPRTL__kmpc_reduce_nowait,
626 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
627 // kmp_critical_name *lck);
628 OMPRTL__kmpc_end_reduce,
629 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
630 // kmp_critical_name *lck);
631 OMPRTL__kmpc_end_reduce_nowait,
632 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
633 // kmp_task_t * new_task);
634 OMPRTL__kmpc_omp_task_begin_if0,
635 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
636 // kmp_task_t * new_task);
637 OMPRTL__kmpc_omp_task_complete_if0,
638 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
639 OMPRTL__kmpc_ordered,
640 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
641 OMPRTL__kmpc_end_ordered,
642 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
643 // global_tid);
644 OMPRTL__kmpc_omp_taskwait,
645 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
646 OMPRTL__kmpc_taskgroup,
647 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
648 OMPRTL__kmpc_end_taskgroup,
649 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
650 // int proc_bind);
651 OMPRTL__kmpc_push_proc_bind,
652 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
653 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
654 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
655 OMPRTL__kmpc_omp_task_with_deps,
656 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
657 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
658 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
659 OMPRTL__kmpc_omp_wait_deps,
660 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
661 // global_tid, kmp_int32 cncl_kind);
662 OMPRTL__kmpc_cancellationpoint,
663 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
664 // kmp_int32 cncl_kind);
665 OMPRTL__kmpc_cancel,
666 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
667 // kmp_int32 num_teams, kmp_int32 thread_limit);
668 OMPRTL__kmpc_push_num_teams,
669 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
670 // microtask, ...);
671 OMPRTL__kmpc_fork_teams,
672 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
673 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
674 // sched, kmp_uint64 grainsize, void *task_dup);
675 OMPRTL__kmpc_taskloop,
676 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
677 // num_dims, struct kmp_dim *dims);
678 OMPRTL__kmpc_doacross_init,
679 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
680 OMPRTL__kmpc_doacross_fini,
681 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
682 // *vec);
683 OMPRTL__kmpc_doacross_post,
684 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
685 // *vec);
686 OMPRTL__kmpc_doacross_wait,
687 // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
688 // *data);
689 OMPRTL__kmpc_task_reduction_init,
690 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
691 // *d);
692 OMPRTL__kmpc_task_reduction_get_th_data,
693 // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
694 OMPRTL__kmpc_alloc,
695 // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
696 OMPRTL__kmpc_free,
697
698 //
699 // Offloading related calls
700 //
701 // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
702 // size);
703 OMPRTL__kmpc_push_target_tripcount,
704 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
705 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
706 // *arg_types);
707 OMPRTL__tgt_target,
708 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
709 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
710 // *arg_types);
711 OMPRTL__tgt_target_nowait,
712 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
713 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
714 // *arg_types, int32_t num_teams, int32_t thread_limit);
715 OMPRTL__tgt_target_teams,
716 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
717 // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
718 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
719 OMPRTL__tgt_target_teams_nowait,
720 // Call to void __tgt_register_requires(int64_t flags);
721 OMPRTL__tgt_register_requires,
722 // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
723 OMPRTL__tgt_register_lib,
724 // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
725 OMPRTL__tgt_unregister_lib,
726 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
727 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
728 OMPRTL__tgt_target_data_begin,
729 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
730 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
731 // *arg_types);
732 OMPRTL__tgt_target_data_begin_nowait,
733 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
734 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
735 OMPRTL__tgt_target_data_end,
736 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
737 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
738 // *arg_types);
739 OMPRTL__tgt_target_data_end_nowait,
740 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
741 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
742 OMPRTL__tgt_target_data_update,
743 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
744 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
745 // *arg_types);
746 OMPRTL__tgt_target_data_update_nowait,
747};
748
749/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
750/// region.
751class CleanupTy final : public EHScopeStack::Cleanup {
752 PrePostActionTy *Action;
753
754public:
755 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
756 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
757 if (!CGF.HaveInsertPoint())
758 return;
759 Action->Exit(CGF);
760 }
761};
762
763} // anonymous namespace
764
765void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
766 CodeGenFunction::RunCleanupsScope Scope(CGF);
767 if (PrePostAction) {
768 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
769 Callback(CodeGen, CGF, *PrePostAction);
770 } else {
771 PrePostActionTy Action;
772 Callback(CodeGen, CGF, Action);
773 }
774}
775
776/// Check if the combiner is a call to UDR combiner and if it is so return the
777/// UDR decl used for reduction.
778static const OMPDeclareReductionDecl *
779getReductionInit(const Expr *ReductionOp) {
780 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
781 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
782 if (const auto *DRE =
783 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
784 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
785 return DRD;
786 return nullptr;
787}
788
789static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
790 const OMPDeclareReductionDecl *DRD,
791 const Expr *InitOp,
792 Address Private, Address Original,
793 QualType Ty) {
794 if (DRD->getInitializer()) {
795 std::pair<llvm::Function *, llvm::Function *> Reduction =
796 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
797 const auto *CE = cast<CallExpr>(InitOp);
798 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
799 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
800 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
801 const auto *LHSDRE =
802 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
803 const auto *RHSDRE =
804 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
805 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
806 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
807 [=]() { return Private; });
808 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
809 [=]() { return Original; });
810 (void)PrivateScope.Privatize();
811 RValue Func = RValue::get(Reduction.second);
812 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
813 CGF.EmitIgnoredExpr(InitOp);
814 } else {
815 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
816 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
817 auto *GV = new llvm::GlobalVariable(
818 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
819 llvm::GlobalValue::PrivateLinkage, Init, Name);
820 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
821 RValue InitRVal;
822 switch (CGF.getEvaluationKind(Ty)) {
823 case TEK_Scalar:
824 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
825 break;
826 case TEK_Complex:
827 InitRVal =
828 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
829 break;
830 case TEK_Aggregate:
831 InitRVal = RValue::getAggregate(LV.getAddress());
832 break;
833 }
834 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
835 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
836 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
837 /*IsInitializer=*/false);
838 }
839}
840
841/// Emit initialization of arrays of complex types.
842/// \param DestAddr Address of the array.
843/// \param Type Type of array.
844/// \param Init Initial expression of array.
845/// \param SrcAddr Address of the original array.
846static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
847 QualType Type, bool EmitDeclareReductionInit,
848 const Expr *Init,
849 const OMPDeclareReductionDecl *DRD,
850 Address SrcAddr = Address::invalid()) {
851 // Perform element-by-element initialization.
852 QualType ElementTy;
853
854 // Drill down to the base element type on both arrays.
855 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
856 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
857 DestAddr =
858 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
859 if (DRD)
860 SrcAddr =
861 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
862
863 llvm::Value *SrcBegin = nullptr;
864 if (DRD)
865 SrcBegin = SrcAddr.getPointer();
866 llvm::Value *DestBegin = DestAddr.getPointer();
867 // Cast from pointer to array type to pointer to single element.
868 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
869 // The basic structure here is a while-do loop.
870 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
871 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
872 llvm::Value *IsEmpty =
873 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
874 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
875
876 // Enter the loop body, making that address the current address.
877 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
878 CGF.EmitBlock(BodyBB);
879
880 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
881
882 llvm::PHINode *SrcElementPHI = nullptr;
883 Address SrcElementCurrent = Address::invalid();
884 if (DRD) {
885 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
886 "omp.arraycpy.srcElementPast");
887 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
888 SrcElementCurrent =
889 Address(SrcElementPHI,
890 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
891 }
892 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
893 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
894 DestElementPHI->addIncoming(DestBegin, EntryBB);
895 Address DestElementCurrent =
896 Address(DestElementPHI,
897 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
898
899 // Emit copy.
900 {
901 CodeGenFunction::RunCleanupsScope InitScope(CGF);
902 if (EmitDeclareReductionInit) {
903 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
904 SrcElementCurrent, ElementTy);
905 } else
906 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
907 /*IsInitializer=*/false);
908 }
909
910 if (DRD) {
911 // Shift the address forward by one element.
912 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
913 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
914 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
915 }
916
917 // Shift the address forward by one element.
918 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
919 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
920 // Check whether we've reached the end.
921 llvm::Value *Done =
922 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
923 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
924 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
925
926 // Done.
927 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
928}
929
930LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
931 return CGF.EmitOMPSharedLValue(E);
932}
933
934LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
935 const Expr *E) {
936 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
937 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
938 return LValue();
939}
940
941void ReductionCodeGen::emitAggregateInitialization(
942 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
943 const OMPDeclareReductionDecl *DRD) {
944 // Emit VarDecl with copy init for arrays.
945 // Get the address of the original variable captured in current
946 // captured region.
947 const auto *PrivateVD =
948 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
949 bool EmitDeclareReductionInit =
950 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
951 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
952 EmitDeclareReductionInit,
953 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
954 : PrivateVD->getInit(),
955 DRD, SharedLVal.getAddress());
956}
957
958ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
959 ArrayRef<const Expr *> Privates,
960 ArrayRef<const Expr *> ReductionOps) {
961 ClausesData.reserve(Shareds.size());
962 SharedAddresses.reserve(Shareds.size());
963 Sizes.reserve(Shareds.size());
964 BaseDecls.reserve(Shareds.size());
965 auto IPriv = Privates.begin();
966 auto IRed = ReductionOps.begin();
967 for (const Expr *Ref : Shareds) {
968 ClausesData.emplace_back(Ref, *IPriv, *IRed);
969 std::advance(IPriv, 1);
970 std::advance(IRed, 1);
971 }
972}
973
974void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
975 assert(SharedAddresses.size() == N &&
976 "Number of generated lvalues must be exactly N.");
977 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
978 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
979 SharedAddresses.emplace_back(First, Second);
980}
981
982void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
983 const auto *PrivateVD =
984 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
985 QualType PrivateType = PrivateVD->getType();
986 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
987 if (!PrivateType->isVariablyModifiedType()) {
988 Sizes.emplace_back(
989 CGF.getTypeSize(
990 SharedAddresses[N].first.getType().getNonReferenceType()),
991 nullptr);
992 return;
993 }
994 llvm::Value *Size;
995 llvm::Value *SizeInChars;
996 auto *ElemType =
997 cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
998 ->getElementType();
999 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
1000 if (AsArraySection) {
1001 Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
1002 SharedAddresses[N].first.getPointer());
1003 Size = CGF.Builder.CreateNUWAdd(
1004 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
1005 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
1006 } else {
1007 SizeInChars = CGF.getTypeSize(
1008 SharedAddresses[N].first.getType().getNonReferenceType());
1009 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
1010 }
1011 Sizes.emplace_back(SizeInChars, Size);
1012 CodeGenFunction::OpaqueValueMapping OpaqueMap(
1013 CGF,
1014 cast<OpaqueValueExpr>(
1015 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1016 RValue::get(Size));
1017 CGF.EmitVariablyModifiedType(PrivateType);
1018}
1019
1020void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
1021 llvm::Value *Size) {
1022 const auto *PrivateVD =
1023 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1024 QualType PrivateType = PrivateVD->getType();
1025 if (!PrivateType->isVariablyModifiedType()) {
1026 assert(!Size && !Sizes[N].second &&
1027 "Size should be nullptr for non-variably modified reduction "
1028 "items.");
1029 return;
1030 }
1031 CodeGenFunction::OpaqueValueMapping OpaqueMap(
1032 CGF,
1033 cast<OpaqueValueExpr>(
1034 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1035 RValue::get(Size));
1036 CGF.EmitVariablyModifiedType(PrivateType);
1037}
1038
1039void ReductionCodeGen::emitInitialization(
1040 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1041 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1042 assert(SharedAddresses.size() > N && "No variable was generated");
1043 const auto *PrivateVD =
1044 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1045 const OMPDeclareReductionDecl *DRD =
1046 getReductionInit(ClausesData[N].ReductionOp);
1047 QualType PrivateType = PrivateVD->getType();
1048 PrivateAddr = CGF.Builder.CreateElementBitCast(
1049 PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1050 QualType SharedType = SharedAddresses[N].first.getType();
1051 SharedLVal = CGF.MakeAddrLValue(
1052 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
1053 CGF.ConvertTypeForMem(SharedType)),
1054 SharedType, SharedAddresses[N].first.getBaseInfo(),
1055 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1056 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1057 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1058 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1059 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1060 PrivateAddr, SharedLVal.getAddress(),
1061 SharedLVal.getType());
1062 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1063 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1064 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1065 PrivateVD->getType().getQualifiers(),
1066 /*IsInitializer=*/false);
1067 }
1068}
1069
1070bool ReductionCodeGen::needCleanups(unsigned N) {
1071 const auto *PrivateVD =
1072 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1073 QualType PrivateType = PrivateVD->getType();
1074 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1075 return DTorKind != QualType::DK_none;
1076}
1077
1078void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1079 Address PrivateAddr) {
1080 const auto *PrivateVD =
1081 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1082 QualType PrivateType = PrivateVD->getType();
1083 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1084 if (needCleanups(N)) {
1085 PrivateAddr = CGF.Builder.CreateElementBitCast(
1086 PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1087 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1088 }
1089}
1090
1091static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1092 LValue BaseLV) {
1093 BaseTy = BaseTy.getNonReferenceType();
1094 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1095 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1096 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1097 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
1098 } else {
1099 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
1100 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1101 }
1102 BaseTy = BaseTy->getPointeeType();
1103 }
1104 return CGF.MakeAddrLValue(
1105 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),
1106 CGF.ConvertTypeForMem(ElTy)),
1107 BaseLV.getType(), BaseLV.getBaseInfo(),
1108 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1109}
1110
1111static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1112 llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1113 llvm::Value *Addr) {
1114 Address Tmp = Address::invalid();
1115 Address TopTmp = Address::invalid();
1116 Address MostTopTmp = Address::invalid();
1117 BaseTy = BaseTy.getNonReferenceType();
1118 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1119 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1120 Tmp = CGF.CreateMemTemp(BaseTy);
1121 if (TopTmp.isValid())
1122 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1123 else
1124 MostTopTmp = Tmp;
1125 TopTmp = Tmp;
1126 BaseTy = BaseTy->getPointeeType();
1127 }
1128 llvm::Type *Ty = BaseLVType;
1129 if (Tmp.isValid())
1130 Ty = Tmp.getElementType();
1131 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1132 if (Tmp.isValid()) {
1133 CGF.Builder.CreateStore(Addr, Tmp);
1134 return MostTopTmp;
1135 }
1136 return Address(Addr, BaseLVAlignment);
1137}
1138
1139static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1140 const VarDecl *OrigVD = nullptr;
1141 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1142 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1143 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1144 Base = TempOASE->getBase()->IgnoreParenImpCasts();
1145 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1146 Base = TempASE->getBase()->IgnoreParenImpCasts();
1147 DE = cast<DeclRefExpr>(Base);
1148 OrigVD = cast<VarDecl>(DE->getDecl());
1149 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1150 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1151 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1152 Base = TempASE->getBase()->IgnoreParenImpCasts();
1153 DE = cast<DeclRefExpr>(Base);
1154 OrigVD = cast<VarDecl>(DE->getDecl());
1155 }
1156 return OrigVD;
1157}
1158
1159Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1160 Address PrivateAddr) {
1161 const DeclRefExpr *DE;
1162 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1163 BaseDecls.emplace_back(OrigVD);
1164 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1165 LValue BaseLValue =
1166 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1167 OriginalBaseLValue);
1168 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1169 BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
1170 llvm::Value *PrivatePointer =
1171 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1172 PrivateAddr.getPointer(),
1173 SharedAddresses[N].first.getAddress().getType());
1174 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1175 return castToBase(CGF, OrigVD->getType(),
1176 SharedAddresses[N].first.getType(),
1177 OriginalBaseLValue.getAddress().getType(),
1178 OriginalBaseLValue.getAlignment(), Ptr);
1179 }
1180 BaseDecls.emplace_back(
1181 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1182 return PrivateAddr;
1183}
1184
1185bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1186 const OMPDeclareReductionDecl *DRD =
1187 getReductionInit(ClausesData[N].ReductionOp);
1188 return DRD && DRD->getInitializer();
1189}
1190
1191LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1192 return CGF.EmitLoadOfPointerLValue(
1193 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1194 getThreadIDVariable()->getType()->castAs<PointerType>());
1195}
1196
1197void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1198 if (!CGF.HaveInsertPoint())
1199 return;
1200 // 1.2.2 OpenMP Language Terminology
1201 // Structured block - An executable statement with a single entry at the
1202 // top and a single exit at the bottom.
1203 // The point of exit cannot be a branch out of the structured block.
1204 // longjmp() and throw() must not violate the entry/exit criteria.
1205 CGF.EHStack.pushTerminate();
1206 CodeGen(CGF);
1207 CGF.EHStack.popTerminate();
1208}
1209
1210LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1211 CodeGenFunction &CGF) {
1212 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1213 getThreadIDVariable()->getType(),
1214 AlignmentSource::Decl);
1215}
1216
1217static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1218 QualType FieldTy) {
1219 auto *Field = FieldDecl::Create(
1220 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1221 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1222 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1223 Field->setAccess(AS_public);
1224 DC->addDecl(Field);
1225 return Field;
1226}
1227
1228CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1229 StringRef Separator)
1230 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1231 OffloadEntriesInfoManager(CGM) {
1232 ASTContext &C = CGM.getContext();
1233 RecordDecl *RD = C.buildImplicitRecord("ident_t");
1234 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1235 RD->startDefinition();
1236 // reserved_1
1237 addFieldToRecordDecl(C, RD, KmpInt32Ty);
1238 // flags
1239 addFieldToRecordDecl(C, RD, KmpInt32Ty);
1240 // reserved_2
1241 addFieldToRecordDecl(C, RD, KmpInt32Ty);
1242 // reserved_3
1243 addFieldToRecordDecl(C, RD, KmpInt32Ty);
1244 // psource
1245 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1246 RD->completeDefinition();
1247 IdentQTy = C.getRecordType(RD);
1248 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1249 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1250
1251 loadOffloadInfoMetadata();
1252}
1253
1254void CGOpenMPRuntime::clear() {
1255 InternalVars.clear();
1256 // Clean non-target variable declarations possibly used only in debug info.
1257 for (const auto &Data : EmittedNonTargetVariables) {
1258 if (!Data.getValue().pointsToAliveValue())
1259 continue;
1260 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1261 if (!GV)
1262 continue;
1263 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1264 continue;
1265 GV->eraseFromParent();
1266 }
1267}
1268
1269std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1270 SmallString<128> Buffer;
1271 llvm::raw_svector_ostream OS(Buffer);
1272 StringRef Sep = FirstSeparator;
1273 for (StringRef Part : Parts) {
1274 OS << Sep << Part;
1275 Sep = Separator;
1276 }
1277 return OS.str();
1278}
1279
1280static llvm::Function *
1281emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1282 const Expr *CombinerInitializer, const VarDecl *In,
1283 const VarDecl *Out, bool IsCombiner) {
1284 // void .omp_combiner.(Ty *in, Ty *out);
1285 ASTContext &C = CGM.getContext();
1286 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1287 FunctionArgList Args;
1288 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1289 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1290 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1291 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1292 Args.push_back(&OmpOutParm);
1293 Args.push_back(&OmpInParm);
1294 const CGFunctionInfo &FnInfo =
1295 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1296 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1297 std::string Name = CGM.getOpenMPRuntime().getName(
1298 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1299 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1300 Name, &CGM.getModule());
1301 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1302 if (CGM.getLangOpts().Optimize) {
1303 Fn->removeFnAttr(llvm::Attribute::NoInline);
1304 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1305 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1306 }
1307 CodeGenFunction CGF(CGM);
1308 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1309 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1310 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1311 Out->getLocation());
1312 CodeGenFunction::OMPPrivateScope Scope(CGF);
1313 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1314 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1315 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1316 .getAddress();
1317 });
1318 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1319 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1320 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1321 .getAddress();
1322 });
1323 (void)Scope.Privatize();
1324 if (!IsCombiner && Out->hasInit() &&
1325 !CGF.isTrivialInitializer(Out->getInit())) {
1326 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1327 Out->getType().getQualifiers(),
1328 /*IsInitializer=*/true);
1329 }
1330 if (CombinerInitializer)
1331 CGF.EmitIgnoredExpr(CombinerInitializer);
1332 Scope.ForceCleanup();
1333 CGF.FinishFunction();
1334 return Fn;
1335}
1336
1337void CGOpenMPRuntime::emitUserDefinedReduction(
1338 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1339 if (UDRMap.count(D) > 0)
1340 return;
1341 llvm::Function *Combiner = emitCombinerOrInitializer(
1342 CGM, D->getType(), D->getCombiner(),
1343 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1344 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1345 /*IsCombiner=*/true);
1346 llvm::Function *Initializer = nullptr;
1347 if (const Expr *Init = D->getInitializer()) {
1348 Initializer = emitCombinerOrInitializer(
1349 CGM, D->getType(),
1350 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1351 : nullptr,
1352 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1353 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1354 /*IsCombiner=*/false);
1355 }
1356 UDRMap.try_emplace(D, Combiner, Initializer);
1357 if (CGF) {
1358 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1359 Decls.second.push_back(D);
1360 }
1361}
1362
1363std::pair<llvm::Function *, llvm::Function *>
1364CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1365 auto I = UDRMap.find(D);
1366 if (I != UDRMap.end())
1367 return I->second;
1368 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1369 return UDRMap.lookup(D);
1370}
1371
1372static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1373 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1374 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1375 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1376 assert(ThreadIDVar->getType()->isPointerType() &&
1377 "thread id variable must be of type kmp_int32 *");
1378 CodeGenFunction CGF(CGM, true);
1379 bool HasCancel = false;
1380 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1381 HasCancel = OPD->hasCancel();
1382 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1383 HasCancel = OPSD->hasCancel();
1384 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1385 HasCancel = OPFD->hasCancel();
1386 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1387 HasCancel = OPFD->hasCancel();
1388 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1389 HasCancel = OPFD->hasCancel();
1390 else if (const auto *OPFD =
1391 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1392 HasCancel = OPFD->hasCancel();
1393 else if (const auto *OPFD =
1394 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1395 HasCancel = OPFD->hasCancel();
1396 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1397 HasCancel, OutlinedHelperName);
1398 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1399 return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1400}
1401
1402llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1403 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1404 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1405 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1406 return emitParallelOrTeamsOutlinedFunction(
1407 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1408}
1409
1410llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1411 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1412 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1413 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1414 return emitParallelOrTeamsOutlinedFunction(
1415 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1416}
1417
1418llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1419 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1420 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1421 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1422 bool Tied, unsigned &NumberOfParts) {
1423 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1424 PrePostActionTy &) {
1425 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1426 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1427 llvm::Value *TaskArgs[] = {
1428 UpLoc, ThreadID,
1429 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1430 TaskTVar->getType()->castAs<PointerType>())
1431 .getPointer()};
1432 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1433 };
1434 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1435 UntiedCodeGen);
1436 CodeGen.setAction(Action);
1437 assert(!ThreadIDVar->getType()->isPointerType() &&
1438 "thread id variable must be of type kmp_int32 for tasks");
1439 const OpenMPDirectiveKind Region =
1440 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1441 : OMPD_task;
1442 const CapturedStmt *CS = D.getCapturedStmt(Region);
1443 const auto *TD = dyn_cast<OMPTaskDirective>(&D);
1444 CodeGenFunction CGF(CGM, true);
1445 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1446 InnermostKind,
1447 TD ? TD->hasCancel() : false, Action);
1448 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1449 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1450 if (!Tied)
1451 NumberOfParts = Action.getNumberOfParts();
1452 return Res;
1453}
1454
1455static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1456 const RecordDecl *RD, const CGRecordLayout &RL,
1457 ArrayRef<llvm::Constant *> Data) {
1458 llvm::StructType *StructTy = RL.getLLVMType();
1459 unsigned PrevIdx = 0;
1460 ConstantInitBuilder CIBuilder(CGM);
1461 auto DI = Data.begin();
1462 for (const FieldDecl *FD : RD->fields()) {
1463 unsigned Idx = RL.getLLVMFieldNo(FD);
1464 // Fill the alignment.
1465 for (unsigned I = PrevIdx; I < Idx; ++I)
1466 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1467 PrevIdx = Idx + 1;
1468 Fields.add(*DI);
1469 ++DI;
1470 }
1471}
1472
1473template <class... As>
1474static llvm::GlobalVariable *
1475createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1476 ArrayRef<llvm::Constant *> Data, const Twine &Name,
1477 As &&... Args) {
1478 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1479 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1480 ConstantInitBuilder CIBuilder(CGM);
1481 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1482 buildStructValue(Fields, CGM, RD, RL, Data);
1483 return Fields.finishAndCreateGlobal(
1484 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1485 std::forward<As>(Args)...);
1486}
1487
1488template <typename T>
1489static void
1490createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1491 ArrayRef<llvm::Constant *> Data,
1492 T &Parent) {
1493 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1494 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1495 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1496 buildStructValue(Fields, CGM, RD, RL, Data);
1497 Fields.finishAndAddTo(Parent);
1498}
1499
1500Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1501 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1502 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1503 FlagsTy FlagsKey(Flags, Reserved2Flags);
1504 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1505 if (!Entry) {
1506 if (!DefaultOpenMPPSource) {
1507 // Initialize default location for psource field of ident_t structure of
1508 // all ident_t objects. Format is ";file;function;line;column;;".
1509 // Taken from
1510 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1511 DefaultOpenMPPSource =
1512 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1513 DefaultOpenMPPSource =
1514 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1515 }
1516
1517 llvm::Constant *Data[] = {
1518 llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1519 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1520 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1521 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1522 llvm::GlobalValue *DefaultOpenMPLocation =
1523 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1524 llvm::GlobalValue::PrivateLinkage);
1525 DefaultOpenMPLocation->setUnnamedAddr(
1526 llvm::GlobalValue::UnnamedAddr::Global);
1527
1528 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1529 }
1530 return Address(Entry, Align);
1531}
1532
1533void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1534 bool AtCurrentPoint) {
1535 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1536 assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1537
1538 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1539 if (AtCurrentPoint) {
1540 Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1541 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1542 } else {
1543 Elem.second.ServiceInsertPt =
1544 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1545 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1546 }
1547}
1548
1549void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1550 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1551 if (Elem.second.ServiceInsertPt) {
1552 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1553 Elem.second.ServiceInsertPt = nullptr;
1554 Ptr->eraseFromParent();
1555 }
1556}
1557
1558llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1559 SourceLocation Loc,
1560 unsigned Flags) {
1561 Flags |= OMP_IDENT_KMPC;
1562 // If no debug info is generated - return global default location.
1563 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1564 Loc.isInvalid())
1565 return getOrCreateDefaultLocation(Flags).getPointer();
1566
1567 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1568
1569 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1570 Address LocValue = Address::invalid();
1571 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1572 if (I != OpenMPLocThreadIDMap.end())
1573 LocValue = Address(I->second.DebugLoc, Align);
1574
1575 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1576 // GetOpenMPThreadID was called before this routine.
1577 if (!LocValue.isValid()) {
1578 // Generate "ident_t .kmpc_loc.addr;"
1579 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1580 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1581 Elem.second.DebugLoc = AI.getPointer();
1582 LocValue = AI;
1583
1584 if (!Elem.second.ServiceInsertPt)
1585 setLocThreadIdInsertPt(CGF);
1586 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1587 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1588 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1589 CGF.getTypeSize(IdentQTy));
1590 }
1591
1592 // char **psource = &.kmpc_loc_<flags>.addr.psource;
1593 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1594 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1595 LValue PSource =
1596 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1597
1598 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1599 if (OMPDebugLoc == nullptr) {
1600 SmallString<128> Buffer2;
1601 llvm::raw_svector_ostream OS2(Buffer2);
1602 // Build debug location
1603 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1604 OS2 << ";" << PLoc.getFilename() << ";";
1605 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1606 OS2 << FD->getQualifiedNameAsString();
1607 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1608 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1609 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1610 }
1611 // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1612 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1613
1614 // Our callers always pass this to a runtime function, so for
1615 // convenience, go ahead and return a naked pointer.
1616 return LocValue.getPointer();
1617}
1618
1619llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1620 SourceLocation Loc) {
1621 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1622
1623 llvm::Value *ThreadID = nullptr;
1624 // Check whether we've already cached a load of the thread id in this
1625 // function.
1626 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1627 if (I != OpenMPLocThreadIDMap.end()) {
1628 ThreadID = I->second.ThreadID;
1629 if (ThreadID != nullptr)
1630 return ThreadID;
1631 }
1632 // If exceptions are enabled, do not use parameter to avoid possible crash.
1633 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1634 !CGF.getLangOpts().CXXExceptions ||
1635 CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1636 if (auto *OMPRegionInfo =
1637 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1638 if (OMPRegionInfo->getThreadIDVariable()) {
1639 // Check if this an outlined function with thread id passed as argument.
1640 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1641 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1642 // If value loaded in entry block, cache it and use it everywhere in
1643 // function.
1644 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1645 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1646 Elem.second.ThreadID = ThreadID;
1647 }
1648 return ThreadID;
1649 }
1650 }
1651 }
1652
1653 // This is not an outlined function region - need to call __kmpc_int32
1654 // kmpc_global_thread_num(ident_t *loc).
1655 // Generate thread id value and cache this value for use across the
1656 // function.
1657 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1658 if (!Elem.second.ServiceInsertPt)
1659 setLocThreadIdInsertPt(CGF);
1660 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1661 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1662 llvm::CallInst *Call = CGF.Builder.CreateCall(
1663 createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1664 emitUpdateLocation(CGF, Loc));
1665 Call->setCallingConv(CGF.getRuntimeCC());
1666 Elem.second.ThreadID = Call;
1667 return Call;
1668}
1669
1670void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1671 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1672 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1673 clearLocThreadIdInsertPt(CGF);
1674 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1675 }
1676 if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1677 for(auto *D : FunctionUDRMap[CGF.CurFn])
1678 UDRMap.erase(D);
1679 FunctionUDRMap.erase(CGF.CurFn);
1680 }
1681}
1682
1683llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1684 return IdentTy->getPointerTo();
1685}
1686
1687llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1688 if (!Kmpc_MicroTy) {
1689 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1690 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1691 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1692 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1693 }
1694 return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1695}
1696
1697llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1698 llvm::FunctionCallee RTLFn = nullptr;
1699 unsigned DefaultAS = CGM.getTargetCodeGenInfo().getDefaultAS();
1700 switch (static_cast<OpenMPRTLFunction>(Function)) {
1701 case OMPRTL__kmpc_fork_call: {
1702 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1703 // microtask, ...);
1704 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1705 getKmpc_MicroPointerTy()};
1706 auto *FnTy =
1707 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1708 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1709 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1710 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1711 llvm::LLVMContext &Ctx = F->getContext();
1712 llvm::MDBuilder MDB(Ctx);
1713 // Annotate the callback behavior of the __kmpc_fork_call:
1714 // - The callback callee is argument number 2 (microtask).
1715 // - The first two arguments of the callback callee are unknown (-1).
1716 // - All variadic arguments to the __kmpc_fork_call are passed to the
1717 // callback callee.
1718 F->addMetadata(
1719 llvm::LLVMContext::MD_callback,
1720 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1721 2, {-1, -1},
1722 /* VarArgsArePassed */ true)}));
1723 }
1724 }
1725 break;
1726 }
1727 case OMPRTL__kmpc_global_thread_num: {
1728 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1729 llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1730 auto *FnTy =
1731 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1732 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1733 break;
1734 }
1735 case OMPRTL__kmpc_threadprivate_cached: {
1736 // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1737 // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1738 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1739 CGM.VoidPtrTy, CGM.SizeTy,
1740 CGM.VoidPtrTy->getPointerTo(DefaultAS)->getPointerTo(DefaultAS)};
1741 auto *FnTy =
1742 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1743 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1744 break;
1745 }
1746 case OMPRTL__kmpc_critical: {
1747 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1748 // kmp_critical_name *crit);
1749 llvm::Type *TypeParams[] = {
1750 getIdentTyPointerTy(), CGM.Int32Ty,
1751 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1752 auto *FnTy =
1753 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1754 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1755 break;
1756 }
1757 case OMPRTL__kmpc_critical_with_hint: {
1758 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1759 // kmp_critical_name *crit, uintptr_t hint);
1760 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1761 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1762 CGM.IntPtrTy};
1763 auto *FnTy =
1764 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1765 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1766 break;
1767 }
1768 case OMPRTL__kmpc_threadprivate_register: {
1769 // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1770 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1771 // typedef void *(*kmpc_ctor)(void *);
1772 auto *KmpcCtorTy =
1773 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1774 /*isVarArg*/ false)->getPointerTo(DefaultAS);
1775 // typedef void *(*kmpc_cctor)(void *, void *);
1776 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1777 auto *KmpcCopyCtorTy =
1778 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1779 /*isVarArg*/ false)->getPointerTo(DefaultAS);
1780 // typedef void (*kmpc_dtor)(void *);
1781 auto *KmpcDtorTy =
1782 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1783 ->getPointerTo(DefaultAS);
1784 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1785 KmpcCopyCtorTy, KmpcDtorTy};
1786 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1787 /*isVarArg*/ false);
1788 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1789 break;
1790 }
1791 case OMPRTL__kmpc_end_critical: {
1792 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1793 // kmp_critical_name *crit);
1794 llvm::Type *TypeParams[] = {
1795 getIdentTyPointerTy(), CGM.Int32Ty,
1796 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1797 auto *FnTy =
1798 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1799 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1800 break;
1801 }
1802 case OMPRTL__kmpc_cancel_barrier: {
1803 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1804 // global_tid);
1805 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1806 auto *FnTy =
1807 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1808 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1809 break;
1810 }
1811 case OMPRTL__kmpc_barrier: {
1812 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1813 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1814 auto *FnTy =
1815 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1816 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1817 break;
1818 }
1819 case OMPRTL__kmpc_for_static_fini: {
1820 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1821 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1822 auto *FnTy =
1823 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1824 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1825 break;
1826 }
1827 case OMPRTL__kmpc_push_num_threads: {
1828 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1829 // kmp_int32 num_threads)
1830 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1831 CGM.Int32Ty};
1832 auto *FnTy =
1833 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1834 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1835 break;
1836 }
1837 case OMPRTL__kmpc_serialized_parallel: {
1838 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1839 // global_tid);
1840 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1841 auto *FnTy =
1842 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1843 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1844 break;
1845 }
1846 case OMPRTL__kmpc_end_serialized_parallel: {
1847 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1848 // global_tid);
1849 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1850 auto *FnTy =
1851 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1852 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1853 break;
1854 }
1855 case OMPRTL__kmpc_flush: {
1856 // Build void __kmpc_flush(ident_t *loc);
1857 llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1858 auto *FnTy =
1859 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1860 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1861 break;
1862 }
1863 case OMPRTL__kmpc_master: {
1864 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1865 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1866 auto *FnTy =
1867 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1868 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1869 break;
1870 }
1871 case OMPRTL__kmpc_end_master: {
1872 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1873 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1874 auto *FnTy =
1875 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1876 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1877 break;
1878 }
1879 case OMPRTL__kmpc_omp_taskyield: {
1880 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1881 // int end_part);
1882 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1883 auto *FnTy =
1884 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1885 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1886 break;
1887 }
1888 case OMPRTL__kmpc_single: {
1889 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1890 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1891 auto *FnTy =
1892 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1893 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1894 break;
1895 }
1896 case OMPRTL__kmpc_end_single: {
1897 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1898 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1899 auto *FnTy =
1900 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1901 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1902 break;
1903 }
1904 case OMPRTL__kmpc_omp_task_alloc: {
1905 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1906 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1907 // kmp_routine_entry_t *task_entry);
1908 assert(KmpRoutineEntryPtrTy != nullptr &&
1909 "Type kmp_routine_entry_t must be created.");
1910 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1911 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1912 // Return void * and then cast to particular kmp_task_t type.
1913 auto *FnTy =
1914 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1915 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1916 break;
1917 }
1918 case OMPRTL__kmpc_omp_task: {
1919 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1920 // *new_task);
1921 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1922 CGM.VoidPtrTy};
1923 auto *FnTy =
1924 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1925 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1926 break;
1927 }
1928 case OMPRTL__kmpc_copyprivate: {
1929 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1930 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1931 // kmp_int32 didit);
1932 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1933 auto *CpyFnTy =
1934 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1935 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1936 CGM.VoidPtrTy, CpyFnTy->getPointerTo(DefaultAS),
1937 CGM.Int32Ty};
1938 auto *FnTy =
1939 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1940 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1941 break;
1942 }
1943 case OMPRTL__kmpc_reduce: {
1944 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1945 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1946 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1947 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1948 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1949 /*isVarArg=*/false);
1950 llvm::Type *TypeParams[] = {
1951 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1952 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(DefaultAS),
1953 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1954 auto *FnTy =
1955 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1956 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1957 break;
1958 }
1959 case OMPRTL__kmpc_reduce_nowait: {
1960 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1961 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1962 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1963 // *lck);
1964 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1965 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1966 /*isVarArg=*/false);
1967 llvm::Type *TypeParams[] = {
1968 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1969 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(DefaultAS),
1970 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1971 auto *FnTy =
1972 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1973 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1974 break;
1975 }
1976 case OMPRTL__kmpc_end_reduce: {
1977 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
1978 // kmp_critical_name *lck);
1979 llvm::Type *TypeParams[] = {
1980 getIdentTyPointerTy(), CGM.Int32Ty,
1981 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1982 auto *FnTy =
1983 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1984 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
1985 break;
1986 }
1987 case OMPRTL__kmpc_end_reduce_nowait: {
1988 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
1989 // kmp_critical_name *lck);
1990 llvm::Type *TypeParams[] = {
1991 getIdentTyPointerTy(), CGM.Int32Ty,
1992 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1993 auto *FnTy =
1994 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1995 RTLFn =
1996 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
1997 break;
1998 }
1999 case OMPRTL__kmpc_omp_task_begin_if0: {
2000 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2001 // *new_task);
2002 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2003 CGM.VoidPtrTy};
2004 auto *FnTy =
2005 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2006 RTLFn =
2007 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
2008 break;
2009 }
2010 case OMPRTL__kmpc_omp_task_complete_if0: {
2011 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2012 // *new_task);
2013 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2014 CGM.VoidPtrTy};
2015 auto *FnTy =
2016 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2017 RTLFn = CGM.CreateRuntimeFunction(FnTy,
2018 /*Name=*/"__kmpc_omp_task_complete_if0");
2019 break;
2020 }
2021 case OMPRTL__kmpc_ordered: {
2022 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
2023 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2024 auto *FnTy =
2025 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2026 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
2027 break;
2028 }
2029 case OMPRTL__kmpc_end_ordered: {
2030 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2031 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2032 auto *FnTy =
2033 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2034 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2035 break;
2036 }
2037 case OMPRTL__kmpc_omp_taskwait: {
2038 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2039 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2040 auto *FnTy =
2041 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2042 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2043 break;
2044 }
2045 case OMPRTL__kmpc_taskgroup: {
2046 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2047 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2048 auto *FnTy =
2049 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2050 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2051 break;
2052 }
2053 case OMPRTL__kmpc_end_taskgroup: {
2054 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2055 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2056 auto *FnTy =
2057 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2058 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2059 break;
2060 }
2061 case OMPRTL__kmpc_push_proc_bind: {
2062 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2063 // int proc_bind)
2064 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2065 auto *FnTy =
2066 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2067 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2068 break;
2069 }
2070 case OMPRTL__kmpc_omp_task_with_deps: {
2071 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2072 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2073 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2074 llvm::Type *TypeParams[] = {
2075 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
2076 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy};
2077 auto *FnTy =
2078 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2079 RTLFn =
2080 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2081 break;
2082 }
2083 case OMPRTL__kmpc_omp_wait_deps: {
2084 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2085 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2086 // kmp_depend_info_t *noalias_dep_list);
2087 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2088 CGM.Int32Ty, CGM.VoidPtrTy,
2089 CGM.Int32Ty, CGM.VoidPtrTy};
2090 auto *FnTy =
2091 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2092 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2093 break;
2094 }
2095 case OMPRTL__kmpc_cancellationpoint: {
2096 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2097 // global_tid, kmp_int32 cncl_kind)
2098 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2099 auto *FnTy =
2100 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2101 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2102 break;
2103 }
2104 case OMPRTL__kmpc_cancel: {
2105 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2106 // kmp_int32 cncl_kind)
2107 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2108 auto *FnTy =
2109 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2110 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2111 break;
2112 }
2113 case OMPRTL__kmpc_push_num_teams: {
2114 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2115 // kmp_int32 num_teams, kmp_int32 num_threads)
2116 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2117 CGM.Int32Ty};
2118 auto *FnTy =
2119 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2120 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2121 break;
2122 }
2123 case OMPRTL__kmpc_fork_teams: {
2124 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2125 // microtask, ...);
2126 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2127 getKmpc_MicroPointerTy()};
2128 auto *FnTy =
2129 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2130 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2131 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2132 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2133 llvm::LLVMContext &Ctx = F->getContext();
2134 llvm::MDBuilder MDB(Ctx);
2135 // Annotate the callback behavior of the __kmpc_fork_teams:
2136 // - The callback callee is argument number 2 (microtask).
2137 // - The first two arguments of the callback callee are unknown (-1).
2138 // - All variadic arguments to the __kmpc_fork_teams are passed to the
2139 // callback callee.
2140 F->addMetadata(
2141 llvm::LLVMContext::MD_callback,
2142 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2143 2, {-1, -1},
2144 /* VarArgsArePassed */ true)}));
2145 }
2146 }
2147 break;
2148 }
2149 case OMPRTL__kmpc_taskloop: {
2150 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2151 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2152 // sched, kmp_uint64 grainsize, void *task_dup);
2153 llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2154 CGM.IntTy,
2155 CGM.VoidPtrTy,
2156 CGM.IntTy,
2157 CGM.Int64Ty->getPointerTo(DefaultAS),
2158 CGM.Int64Ty->getPointerTo(DefaultAS),
2159 CGM.Int64Ty,
2160 CGM.IntTy,
2161 CGM.IntTy,
2162 CGM.Int64Ty,
2163 CGM.VoidPtrTy};
2164 auto *FnTy =
2165 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2166 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2167 break;
2168 }
2169 case OMPRTL__kmpc_doacross_init: {
2170 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2171 // num_dims, struct kmp_dim *dims);
2172 llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2173 CGM.Int32Ty,
2174 CGM.Int32Ty,
2175 CGM.VoidPtrTy};
2176 auto *FnTy =
2177 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2178 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2179 break;
2180 }
2181 case OMPRTL__kmpc_doacross_fini: {
2182 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2183 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2184 auto *FnTy =
2185 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2186 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2187 break;
2188 }
2189 case OMPRTL__kmpc_doacross_post: {
2190 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2191 // *vec);
2192 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2193 CGM.Int64Ty->getPointerTo(DefaultAS)};
2194 auto *FnTy =
2195 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2196 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2197 break;
2198 }
2199 case OMPRTL__kmpc_doacross_wait: {
2200 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2201 // *vec);
2202 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2203 CGM.Int64Ty->getPointerTo(DefaultAS)};
2204 auto *FnTy =
2205 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2206 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2207 break;
2208 }
2209 case OMPRTL__kmpc_task_reduction_init: {
2210 // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2211 // *data);
2212 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2213 auto *FnTy =
2214 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2215 RTLFn =
2216 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2217 break;
2218 }
2219 case OMPRTL__kmpc_task_reduction_get_th_data: {
2220 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2221 // *d);
2222 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2223 auto *FnTy =
2224 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2225 RTLFn = CGM.CreateRuntimeFunction(
2226 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2227 break;
2228 }
2229 case OMPRTL__kmpc_alloc: {
2230 // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
2231 // al); omp_allocator_handle_t type is void *.
2232 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
2233 auto *FnTy =
2234 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2235 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
2236 break;
2237 }
2238 case OMPRTL__kmpc_free: {
2239 // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
2240 // al); omp_allocator_handle_t type is void *.
2241 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2242 auto *FnTy =
2243 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2244 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
2245 break;
2246 }
2247 case OMPRTL__kmpc_push_target_tripcount: {
2248 // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2249 // size);
2250 llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2251 llvm::FunctionType *FnTy =
2252 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2253 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2254 break;
2255 }
2256 case OMPRTL__tgt_target: {
2257 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2258 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2259 // *arg_types);
2260 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2261 CGM.VoidPtrTy,
2262 CGM.Int32Ty,
2263 CGM.VoidPtrPtrTy,
2264 CGM.VoidPtrPtrTy,
2265 CGM.SizeTy->getPointerTo(DefaultAS),
2266 CGM.Int64Ty->getPointerTo(DefaultAS)};
2267 auto *FnTy =
2268 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2269 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2270 break;
2271 }
2272 case OMPRTL__tgt_target_nowait: {
2273 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2274 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2275 // int64_t *arg_types);
2276 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2277 CGM.VoidPtrTy,
2278 CGM.Int32Ty,
2279 CGM.VoidPtrPtrTy,
2280 CGM.VoidPtrPtrTy,
2281 CGM.SizeTy->getPointerTo(),
2282 CGM.Int64Ty->getPointerTo()};
2283 auto *FnTy =
2284 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2285 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2286 break;
2287 }
2288 case OMPRTL__tgt_target_teams: {
2289 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2290 // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2291 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2292 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2293 CGM.VoidPtrTy,
2294 CGM.Int32Ty,
2295 CGM.VoidPtrPtrTy,
2296 CGM.VoidPtrPtrTy,
2297 CGM.SizeTy->getPointerTo(DefaultAS),
2298 CGM.Int64Ty->getPointerTo(DefaultAS),
2299 CGM.Int32Ty,
2300 CGM.Int32Ty};
2301 auto *FnTy =
2302 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2303 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2304 break;
2305 }
2306 case OMPRTL__tgt_target_teams_nowait: {
2307 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2308 // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
2309 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2310 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2311 CGM.VoidPtrTy,
2312 CGM.Int32Ty,
2313 CGM.VoidPtrPtrTy,
2314 CGM.VoidPtrPtrTy,
2315 CGM.SizeTy->getPointerTo(),
2316 CGM.Int64Ty->getPointerTo(),
2317 CGM.Int32Ty,
2318 CGM.Int32Ty};
2319 auto *FnTy =
2320 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2321 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2322 break;
2323 }
2324 case OMPRTL__tgt_register_requires: {
2325 // Build void __tgt_register_requires(int64_t flags);
2326 llvm::Type *TypeParams[] = {CGM.Int64Ty};
2327 auto *FnTy =
2328 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2329 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
2330 break;
2331 }
2332 case OMPRTL__tgt_register_lib: {
2333 // Build void __tgt_register_lib(__tgt_bin_desc *desc);
2334 QualType ParamTy =
2335 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2336 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2337 auto *FnTy =
2338 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2339 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
2340 break;
2341 }
2342 case OMPRTL__tgt_unregister_lib: {
2343 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
2344 QualType ParamTy =
2345 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2346 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2347 auto *FnTy =
2348 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2349 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
2350 break;
2351 }
2352 case OMPRTL__tgt_target_data_begin: {
2353 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2354 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2355 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2356 CGM.Int32Ty,
2357 CGM.VoidPtrPtrTy,
2358 CGM.VoidPtrPtrTy,
2359 CGM.SizeTy->getPointerTo(DefaultAS),
2360 CGM.Int64Ty->getPointerTo(DefaultAS)};
2361 auto *FnTy =
2362 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2363 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2364 break;
2365 }
2366 case OMPRTL__tgt_target_data_begin_nowait: {
2367 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2368 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2369 // *arg_types);
2370 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2371 CGM.Int32Ty,
2372 CGM.VoidPtrPtrTy,
2373 CGM.VoidPtrPtrTy,
2374 CGM.SizeTy->getPointerTo(),
2375 CGM.Int64Ty->getPointerTo()};
2376 auto *FnTy =
2377 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2378 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2379 break;
2380 }
2381 case OMPRTL__tgt_target_data_end: {
2382 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2383 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2384 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2385 CGM.Int32Ty,
2386 CGM.VoidPtrPtrTy,
2387 CGM.VoidPtrPtrTy,
2388 CGM.SizeTy->getPointerTo(DefaultAS),
2389 CGM.Int64Ty->getPointerTo(DefaultAS)};
2390 auto *FnTy =
2391 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2392 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2393 break;
2394 }
2395 case OMPRTL__tgt_target_data_end_nowait: {
2396 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2397 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2398 // *arg_types);
2399 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2400 CGM.Int32Ty,
2401 CGM.VoidPtrPtrTy,
2402 CGM.VoidPtrPtrTy,
2403 CGM.SizeTy->getPointerTo(),
2404 CGM.Int64Ty->getPointerTo()};
2405 auto *FnTy =
2406 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2407 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2408 break;
2409 }
2410 case OMPRTL__tgt_target_data_update: {
2411 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2412 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2413 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2414 CGM.Int32Ty,
2415 CGM.VoidPtrPtrTy,
2416 CGM.VoidPtrPtrTy,
2417 CGM.SizeTy->getPointerTo(DefaultAS),
2418 CGM.Int64Ty->getPointerTo(DefaultAS)};
2419 auto *FnTy =
2420 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2421 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2422 break;
2423 }
2424 case OMPRTL__tgt_target_data_update_nowait: {
2425 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2426 // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2427 // *arg_types);
2428 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2429 CGM.Int32Ty,
2430 CGM.VoidPtrPtrTy,
2431 CGM.VoidPtrPtrTy,
2432 CGM.SizeTy->getPointerTo(),
2433 CGM.Int64Ty->getPointerTo()};
2434 auto *FnTy =
2435 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2436 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2437 break;
2438 }
2439 }
2440 assert(RTLFn && "Unable to find OpenMP runtime function");
2441 return RTLFn;
2442}
2443
2444llvm::FunctionCallee
2445CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2446 assert((IVSize == 32 || IVSize == 64) &&
2447 "IV size is not compatible with the omp runtime");
2448 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2449 : "__kmpc_for_static_init_4u")
2450 : (IVSigned ? "__kmpc_for_static_init_8"
2451 : "__kmpc_for_static_init_8u");
2452 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2453 auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2454 llvm::Type *TypeParams[] = {
2455 getIdentTyPointerTy(), // loc
2456 CGM.Int32Ty, // tid
2457 CGM.Int32Ty, // schedtype
2458 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2459 PtrTy, // p_lower
2460 PtrTy, // p_upper
2461 PtrTy, // p_stride
2462 ITy, // incr
2463 ITy // chunk
2464 };
2465 auto *FnTy =
2466 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2467 return CGM.CreateRuntimeFunction(FnTy, Name);
2468}
2469
2470llvm::FunctionCallee
2471CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2472 assert((IVSize == 32 || IVSize == 64) &&
2473 "IV size is not compatible with the omp runtime");
2474 StringRef Name =
2475 IVSize == 32
2476 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2477 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2478 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2479 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2480 CGM.Int32Ty, // tid
2481 CGM.Int32Ty, // schedtype
2482 ITy, // lower
2483 ITy, // upper
2484 ITy, // stride
2485 ITy // chunk
2486 };
2487 auto *FnTy =
2488 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2489 return CGM.CreateRuntimeFunction(FnTy, Name);
2490}
2491
2492llvm::FunctionCallee
2493CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2494 assert((IVSize == 32 || IVSize == 64) &&
2495 "IV size is not compatible with the omp runtime");
2496 StringRef Name =
2497 IVSize == 32
2498 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2499 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2500 llvm::Type *TypeParams[] = {
2501 getIdentTyPointerTy(), // loc
2502 CGM.Int32Ty, // tid
2503 };
2504 auto *FnTy =
2505 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2506 return CGM.CreateRuntimeFunction(FnTy, Name);
2507}
2508
2509llvm::FunctionCallee
2510CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2511 assert((IVSize == 32 || IVSize == 64) &&
2512 "IV size is not compatible with the omp runtime");
2513 StringRef Name =
2514 IVSize == 32
2515 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2516 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2517 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2518 auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2519 llvm::Type *TypeParams[] = {
2520 getIdentTyPointerTy(), // loc
2521 CGM.Int32Ty, // tid
2522 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2523 PtrTy, // p_lower
2524 PtrTy, // p_upper
2525 PtrTy // p_stride
2526 };
2527 auto *FnTy =
2528 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2529 return CGM.CreateRuntimeFunction(FnTy, Name);
2530}
2531
2532Address CGOpenMPRuntime::getAddrOfDeclareTargetLink(const VarDecl *VD) {
2533 if (CGM.getLangOpts().OpenMPSimd)
2534 return Address::invalid();
2535 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2536 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2537 if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) {
2538 SmallString<64> PtrName;
2539 {
2540 llvm::raw_svector_ostream OS(PtrName);
2541 OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_link_ptr";
2542 }
2543 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2544 if (!Ptr) {
2545 QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2546 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2547 PtrName);
2548 if (!CGM.getLangOpts().OpenMPIsDevice) {
2549 auto *GV = cast<llvm::GlobalVariable>(Ptr);
2550 GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
2551 GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2552 }
2553 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr));
2554 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2555 }
2556 return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2557 }
2558 return Address::invalid();
2559}
2560
2561llvm::Constant *
2562CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2563 assert(!CGM.getLangOpts().OpenMPUseTLS ||
2564 !CGM.getContext().getTargetInfo().isTLSSupported());
2565 // Lookup the entry, lazily creating it if necessary.
2566 std::string Suffix = getName({"cache", ""});
2567 return getOrCreateInternalVariable(
2568 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2569}
2570
2571Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2572 const VarDecl *VD,
2573 Address VDAddr,
2574 SourceLocation Loc) {
2575 if (CGM.getLangOpts().OpenMPUseTLS &&
2576 CGM.getContext().getTargetInfo().isTLSSupported())
2577 return VDAddr;
2578
2579 llvm::Type *VarTy = VDAddr.getElementType();
2580 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2581 CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2582 CGM.Int8PtrTy),
2583 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2584 getOrCreateThreadPrivateCache(VD)};
2585 return Address(CGF.EmitRuntimeCall(
2586 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2587 VDAddr.getAlignment());
2588}
2589
2590void CGOpenMPRuntime::emitThreadPrivateVarInit(
2591 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2592 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2593 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2594 // library.
2595 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2596 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2597 OMPLoc);
2598 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2599 // to register constructor/destructor for variable.
2600 llvm::Value *Args[] = {
2601 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2602 Ctor, CopyCtor, Dtor};
2603 CGF.EmitRuntimeCall(
2604 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2605}
2606
2607llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2608 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2609 bool PerformInit, CodeGenFunction *CGF) {
2610 if (CGM.getLangOpts().OpenMPUseTLS &&
2611 CGM.getContext().getTargetInfo().isTLSSupported())
2612 return nullptr;
2613
2614 VD = VD->getDefinition(CGM.getContext());
2615 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2616 QualType ASTTy = VD->getType();
2617
2618 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2619 const Expr *Init = VD->getAnyInitializer();
2620 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2621 // Generate function that re-emits the declaration's initializer into the
2622 // threadprivate copy of the variable VD
2623 CodeGenFunction CtorCGF(CGM);
2624 FunctionArgList Args;
2625 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2626 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2627 ImplicitParamDecl::Other);
2628 Args.push_back(&Dst);
2629
2630 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2631 CGM.getContext().VoidPtrTy, Args);
2632 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2633 std::string Name = getName({"__kmpc_global_ctor_", ""});
2634 llvm::Function *Fn =
2635 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2636 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2637 Args, Loc, Loc);
2638 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2639 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2640 CGM.getContext().VoidPtrTy, Dst.getLocation());
2641 Address Arg = Address(ArgVal, VDAddr.getAlignment());
2642 Arg = CtorCGF.Builder.CreateElementBitCast(
2643 Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2644 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2645 /*IsInitializer=*/true);
2646 ArgVal = CtorCGF.EmitLoadOfScalar(
2647 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2648 CGM.getContext().VoidPtrTy, Dst.getLocation());
2649 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2650 CtorCGF.FinishFunction();
2651 Ctor = Fn;
2652 }
2653 if (VD->getType().isDestructedType() != QualType::DK_none) {
2654 // Generate function that emits destructor call for the threadprivate copy
2655 // of the variable VD
2656 CodeGenFunction DtorCGF(CGM);
2657 FunctionArgList Args;
2658 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2659 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2660 ImplicitParamDecl::Other);
2661 Args.push_back(&Dst);
2662
2663 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2664 CGM.getContext().VoidTy, Args);
2665 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2666 std::string Name = getName({"__kmpc_global_dtor_", ""});
2667 llvm::Function *Fn =
2668 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2669 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2670 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2671 Loc, Loc);
2672 // Create a scope with an artificial location for the body of this function.
2673 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2674 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2675 DtorCGF.GetAddrOfLocalVar(&Dst),
2676 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2677 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2678 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2679 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2680 DtorCGF.FinishFunction();
2681 Dtor = Fn;
2682 }
2683 // Do not emit init function if it is not required.
2684 if (!Ctor && !Dtor)
2685 return nullptr;
2686
2687 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2688 unsigned DefaultAS = CGM.getTargetCodeGenInfo().getDefaultAS();
2689 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2690 /*isVarArg=*/false)
2691 ->getPointerTo(DefaultAS);
2692 // Copying constructor for the threadprivate variable.
2693 // Must be NULL - reserved by runtime, but currently it requires that this
2694 // parameter is always NULL. Otherwise it fires assertion.
2695 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2696 if (Ctor == nullptr) {
2697 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2698 /*isVarArg=*/false)
2699 ->getPointerTo(DefaultAS);
2700 Ctor = llvm::Constant::getNullValue(CtorTy);
2701 }
2702 if (Dtor == nullptr) {
2703 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2704 /*isVarArg=*/false)
2705 ->getPointerTo(DefaultAS);
2706 Dtor = llvm::Constant::getNullValue(DtorTy);
2707 }
2708 if (!CGF) {
2709 auto *InitFunctionTy =
2710 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2711 std::string Name = getName({"__omp_threadprivate_init_", ""});
2712 llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2713 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2714 CodeGenFunction InitCGF(CGM);
2715 FunctionArgList ArgList;
2716 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2717 CGM.getTypes().arrangeNullaryFunction(), ArgList,
2718 Loc, Loc);
2719 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2720 InitCGF.FinishFunction();
2721 return InitFunction;
2722 }
2723 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2724 }
2725 return nullptr;
2726}
2727
2728/// Obtain information that uniquely identifies a target entry. This
2729/// consists of the file and device IDs as well as line number associated with
2730/// the relevant entry source location.
2731static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2732 unsigned &DeviceID, unsigned &FileID,
2733 unsigned &LineNum) {
2734 SourceManager &SM = C.getSourceManager();
2735
2736 // The loc should be always valid and have a file ID (the user cannot use
2737 // #pragma directives in macros)
2738
2739 assert(Loc.isValid() && "Source location is expected to be always valid.");
2740
2741 PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2742 assert(PLoc.isValid() && "Source location is expected to be always valid.");
2743
2744 llvm::sys::fs::UniqueID ID;
2745 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2746 SM.getDiagnostics().Report(diag::err_cannot_open_file)
2747 << PLoc.getFilename() << EC.message();
2748
2749 DeviceID = ID.getDevice();
2750 FileID = ID.getFile();
2751 LineNum = PLoc.getLine();
2752}
2753
2754bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2755 llvm::GlobalVariable *Addr,
2756 bool PerformInit) {
2757 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2758 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2759 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link)
2760 return CGM.getLangOpts().OpenMPIsDevice;
2761 VD = VD->getDefinition(CGM.getContext());
2762 if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2763 return CGM.getLangOpts().OpenMPIsDevice;
2764
2765 QualType ASTTy = VD->getType();
2766
2767 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
2768 // Produce the unique prefix to identify the new target regions. We use
2769 // the source location of the variable declaration which we know to not
2770 // conflict with any target region.
2771 unsigned DeviceID;
2772 unsigned FileID;
2773 unsigned Line;
2774 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2775 SmallString<128> Buffer, Out;
2776 {
2777 llvm::raw_svector_ostream OS(Buffer);
2778 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2779 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2780 }
2781
2782 const Expr *Init = VD->getAnyInitializer();
2783 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2784 llvm::Constant *Ctor;
2785 llvm::Constant *ID;
2786 if (CGM.getLangOpts().OpenMPIsDevice) {
2787 // Generate function that re-emits the declaration's initializer into
2788 // the threadprivate copy of the variable VD
2789 CodeGenFunction CtorCGF(CGM);
2790
2791 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2792 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2793 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2794 FTy, Twine(Buffer, "_ctor"), FI, Loc);
2795 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2796 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2797 FunctionArgList(), Loc, Loc);
2798 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2799 CtorCGF.EmitAnyExprToMem(Init,
2800 Address(Addr, CGM.getContext().getDeclAlign(VD)),
2801 Init->getType().getQualifiers(),
2802 /*IsInitializer=*/true);
2803 CtorCGF.FinishFunction();
2804 Ctor = Fn;
2805 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2806 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2807 } else {
2808 Ctor = new llvm::GlobalVariable(
2809 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2810 llvm::GlobalValue::PrivateLinkage,
2811 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2812 ID = Ctor;
2813 }
2814
2815 // Register the information for the entry associated with the constructor.
2816 Out.clear();
2817 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2818 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2819 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2820 }
2821 if (VD->getType().isDestructedType() != QualType::DK_none) {
2822 llvm::Constant *Dtor;
2823 llvm::Constant *ID;
2824 if (CGM.getLangOpts().OpenMPIsDevice) {
2825 // Generate function that emits destructor call for the threadprivate
2826 // copy of the variable VD
2827 CodeGenFunction DtorCGF(CGM);
2828
2829 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2830 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2831 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2832 FTy, Twine(Buffer, "_dtor"), FI, Loc);
2833 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2834 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2835 FunctionArgList(), Loc, Loc);
2836 // Create a scope with an artificial location for the body of this
2837 // function.
2838 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2839 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2840 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2841 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2842 DtorCGF.FinishFunction();
2843 Dtor = Fn;
2844 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2845 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2846 } else {
2847 Dtor = new llvm::GlobalVariable(
2848 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2849 llvm::GlobalValue::PrivateLinkage,
2850 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2851 ID = Dtor;
2852 }
2853 // Register the information for the entry associated with the destructor.
2854 Out.clear();
2855 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2856 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2857 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2858 }
2859 return CGM.getLangOpts().OpenMPIsDevice;
2860}
2861
2862Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2863 QualType VarType,
2864 StringRef Name) {
2865 std::string Suffix = getName({"artificial", ""});
2866 std::string CacheSuffix = getName({"cache", ""});
2867 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2868 llvm::Value *GAddr =
2869 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2870 llvm::Value *Args[] = {
2871 emitUpdateLocation(CGF, SourceLocation()),
2872 getThreadID(CGF, SourceLocation()),
2873 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2874 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2875 /*IsSigned=*/false),
2876 getOrCreateInternalVariable(
2877 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2878 return Address(
2879 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2880 CGF.EmitRuntimeCall(
2881 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2882 VarLVType->getPointerTo(/*AddrSpace=*/0)),
2883 CGM.getPointerAlign());
2884}
2885
2886void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
2887 const RegionCodeGenTy &ThenGen,
2888 const RegionCodeGenTy &ElseGen) {
2889 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2890
2891 // If the condition constant folds and can be elided, try to avoid emitting
2892 // the condition and the dead arm of the if/else.
2893 bool CondConstant;
2894 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2895 if (CondConstant)
2896 ThenGen(CGF);
2897 else
2898 ElseGen(CGF);
2899 return;
2900 }
2901
2902 // Otherwise, the condition did not fold, or we couldn't elide it. Just
2903 // emit the conditional branch.
2904 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2905 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2906 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2907 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2908
2909 // Emit the 'then' code.
2910 CGF.EmitBlock(ThenBlock);
2911 ThenGen(CGF);
2912 CGF.EmitBranch(ContBlock);
2913 // Emit the 'else' code if present.
2914 // There is no need to emit line number for unconditional branch.
2915 (void)ApplyDebugLocation::CreateEmpty(CGF);
2916 CGF.EmitBlock(ElseBlock);
2917 ElseGen(CGF);
2918 // There is no need to emit line number for unconditional branch.
2919 (void)ApplyDebugLocation::CreateEmpty(CGF);
2920 CGF.EmitBranch(ContBlock);
2921 // Emit the continuation block for code after the if.
2922 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2923}
2924
2925void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2926 llvm::Function *OutlinedFn,
2927 ArrayRef<llvm::Value *> CapturedVars,
2928 const Expr *IfCond) {
2929 if (!CGF.HaveInsertPoint())
2930 return;
2931 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2932 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
2933 PrePostActionTy &) {
2934 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2935 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2936 llvm::Value *Args[] = {
2937 RTLoc,
2938 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2939 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2940 llvm::SmallVector<llvm::Value *, 16> RealArgs;
2941 RealArgs.append(std::begin(Args), std::end(Args));
2942 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2943
2944 llvm::FunctionCallee RTLFn =
2945 RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
2946 CGF.EmitRuntimeCall(RTLFn, RealArgs);
2947 };
2948 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
2949 PrePostActionTy &) {
2950 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2951 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2952 // Build calls:
2953 // __kmpc_serialized_parallel(&Loc, GTid);
2954 llvm::Value *Args[] = {RTLoc, ThreadID};
2955 CGF.EmitRuntimeCall(
2956 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
2957
2958 // OutlinedFn(&GTid, &zero, CapturedStruct);
2959 Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2960 /*Name*/ ".zero.addr");
2961 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
2962 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2963 // ThreadId for serialized parallels is 0.
2964 OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2965 OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2966 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2967 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2968
2969 // __kmpc_end_serialized_parallel(&Loc, GTid);
2970 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2971 CGF.EmitRuntimeCall(
2972 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
2973 EndArgs);
2974 };
2975 if (IfCond) {
2976 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
2977 } else {
2978 RegionCodeGenTy ThenRCG(ThenGen);
2979 ThenRCG(CGF);
2980 }
2981}
2982
2983// If we're inside an (outlined) parallel region, use the region info's
2984// thread-ID variable (it is passed in a first argument of the outlined function
2985// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2986// regular serial code region, get thread ID by calling kmp_int32
2987// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2988// return the address of that temp.
2989Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2990 SourceLocation Loc) {
2991 if (auto *OMPRegionInfo =
2992 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2993 if (OMPRegionInfo->getThreadIDVariable())
2994 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2995
2996 llvm::Value *ThreadID = getThreadID(CGF, Loc);
2997 QualType Int32Ty =
2998 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2999 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
3000 CGF.EmitStoreOfScalar(ThreadID,
3001 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
3002
3003 return ThreadIDTemp;
3004}
3005
3006llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
3007 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
3008 SmallString<256> Buffer;
3009 llvm::raw_svector_ostream Out(Buffer);
3010 Out << Name;
3011 StringRef RuntimeName = Out.str();
3012 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
3013 if (Elem.second) {
3014 assert(Elem.second->getType()->getPointerElementType() == Ty &&
3015 "OMP internal variable has different type than requested");
3016 return &*Elem.second;
3017 }
3018
3019 return Elem.second = new llvm::GlobalVariable(
3020 CGM.getModule(), Ty, /*IsConstant*/ false,
3021 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
3022 Elem.first(), /*InsertBefore=*/nullptr,
3023 llvm::GlobalValue::NotThreadLocal, AddressSpace);
3024}
3025
3026llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
3027 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
3028 std::string Name = getName({Prefix, "var"});
3029 return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
3030}
3031
3032namespace {
3033/// Common pre(post)-action for different OpenMP constructs.
3034class CommonActionTy final : public PrePostActionTy {
3035 llvm::FunctionCallee EnterCallee;
3036 ArrayRef<llvm::Value *> EnterArgs;
3037 llvm::FunctionCallee ExitCallee;
3038 ArrayRef<llvm::Value *> ExitArgs;
3039 bool Conditional;
3040 llvm::BasicBlock *ContBlock = nullptr;
3041
3042public:
3043 CommonActionTy(llvm::FunctionCallee EnterCallee,
3044 ArrayRef<llvm::Value *> EnterArgs,
3045 llvm::FunctionCallee ExitCallee,
3046 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
3047 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
3048 ExitArgs(ExitArgs), Conditional(Conditional) {}
3049 void Enter(CodeGenFunction &CGF) override {
3050 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
3051 if (Conditional) {
3052 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
3053 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
3054 ContBlock = CGF.createBasicBlock("omp_if.end");
3055 // Generate the branch (If-stmt)
3056 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
3057 CGF.EmitBlock(ThenBlock);
3058 }
3059 }
3060 void Done(CodeGenFunction &CGF) {
3061 // Emit the rest of blocks/branches
3062 CGF.EmitBranch(ContBlock);
3063 CGF.EmitBlock(ContBlock, true);
3064 }
3065 void Exit(CodeGenFunction &CGF) override {
3066 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3067 }
3068};
3069} // anonymous namespace
3070
3071void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
3072 StringRef CriticalName,
3073 const RegionCodeGenTy &CriticalOpGen,
3074 SourceLocation Loc, const Expr *Hint) {
3075 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3076 // CriticalOpGen();
3077 // __kmpc_end_critical(ident_t *, gtid, Lock);
3078 // Prepare arguments and build a call to __kmpc_critical
3079 if (!CGF.HaveInsertPoint())
3080 return;
3081 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3082 getCriticalRegionLock(CriticalName)};
3083 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3084 std::end(Args));
3085 if (Hint) {
3086 EnterArgs.push_back(CGF.Builder.CreateIntCast(
3087 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3088 }
3089 CommonActionTy Action(
3090 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
3091 : OMPRTL__kmpc_critical),
3092 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
3093 CriticalOpGen.setAction(Action);
3094 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3095}
3096
3097void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
3098 const RegionCodeGenTy &MasterOpGen,
3099 SourceLocation Loc) {
3100 if (!CGF.HaveInsertPoint())
3101 return;
3102 // if(__kmpc_master(ident_t *, gtid)) {
3103 // MasterOpGen();
3104 // __kmpc_end_master(ident_t *, gtid);
3105 // }
3106 // Prepare arguments and build a call to __kmpc_master
3107 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3108 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3109 createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
3110 /*Conditional=*/true);
3111 MasterOpGen.setAction(Action);
3112 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3113 Action.Done(CGF);
3114}
3115
3116void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
3117 SourceLocation Loc) {
3118 if (!CGF.HaveInsertPoint())
3119 return;
3120 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3121 llvm::Value *Args[] = {
3122 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3123 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3124 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
3125 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3126 Region->emitUntiedSwitch(CGF);
3127}
3128
3129void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
3130 const RegionCodeGenTy &TaskgroupOpGen,
3131 SourceLocation Loc) {
3132 if (!CGF.HaveInsertPoint())
3133 return;
3134 // __kmpc_taskgroup(ident_t *, gtid);
3135 // TaskgroupOpGen();
3136 // __kmpc_end_taskgroup(ident_t *, gtid);
3137 // Prepare arguments and build a call to __kmpc_taskgroup
3138 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3139 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3140 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
3141 Args);
3142 TaskgroupOpGen.setAction(Action);
3143 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3144}
3145
3146/// Given an array of pointers to variables, project the address of a
3147/// given variable.
3148static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
3149 unsigned Index, const VarDecl *Var) {
3150 // Pull out the pointer to the variable.
3151 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3152 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3153
3154 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3155 Addr = CGF.Builder.CreateElementBitCast(
3156 Addr, CGF.ConvertTypeForMem(Var->getType()));
3157 return Addr;
3158}
3159
3160static llvm::Value *emitCopyprivateCopyFunction(
3161 CodeGenModule &CGM, llvm::Type *ArgsType,
3162 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3163 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3164 SourceLocation Loc) {
3165 ASTContext &C = CGM.getContext();
3166 // void copy_func(void *LHSArg, void *RHSArg);
3167 FunctionArgList Args;
3168 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3169 ImplicitParamDecl::Other);
3170 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3171 ImplicitParamDecl::Other);
3172 Args.push_back(&LHSArg);
3173 Args.push_back(&RHSArg);
3174 const auto &CGFI =
3175 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3176 std::string Name =
3177 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3178 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3179 llvm::GlobalValue::InternalLinkage, Name,
3180 &CGM.getModule());
3181 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3182 Fn->setDoesNotRecurse();
3183 CodeGenFunction CGF(CGM);
3184 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3185 // Dest = (void*[n])(LHSArg);
3186 // Src = (void*[n])(RHSArg);
3187 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3188 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3189 ArgsType), CGF.getPointerAlign());
3190 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3191 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3192 ArgsType), CGF.getPointerAlign());
3193 // *(Type0*)Dst[0] = *(Type0*)Src[0];
3194 // *(Type1*)Dst[1] = *(Type1*)Src[1];
3195 // ...
3196 // *(Typen*)Dst[n] = *(Typen*)Src[n];
3197 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3198 const auto *DestVar =
3199 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3200 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3201
3202 const auto *SrcVar =
3203 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3204 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3205
3206 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3207 QualType Type = VD->getType();
3208 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3209 }
3210 CGF.FinishFunction();
3211 return Fn;
3212}
3213
3214void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3215 const RegionCodeGenTy &SingleOpGen,
3216 SourceLocation Loc,
3217 ArrayRef<const Expr *> CopyprivateVars,
3218 ArrayRef<const Expr *> SrcExprs,
3219 ArrayRef<const Expr *> DstExprs,
3220 ArrayRef<const Expr *> AssignmentOps) {
3221 if (!CGF.HaveInsertPoint())
3222 return;
3223 assert(CopyprivateVars.size() == SrcExprs.size() &&
3224 CopyprivateVars.size() == DstExprs.size() &&
3225 CopyprivateVars.size() == AssignmentOps.size());
3226 ASTContext &C = CGM.getContext();
3227 // int32 did_it = 0;
3228 // if(__kmpc_single(ident_t *, gtid)) {
3229 // SingleOpGen();
3230 // __kmpc_end_single(ident_t *, gtid);
3231 // did_it = 1;
3232 // }
3233 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3234 // <copy_func>, did_it);
3235
3236 Address DidIt = Address::invalid();
3237 if (!CopyprivateVars.empty()) {
3238 // int32 did_it = 0;
3239 QualType KmpInt32Ty =
3240 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3241 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3242 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3243 }
3244 // Prepare arguments and build a call to __kmpc_single
3245 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3246 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3247 createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3248 /*Conditional=*/true);
3249 SingleOpGen.setAction(Action);
3250 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3251 if (DidIt.isValid()) {
3252 // did_it = 1;
3253 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3254 }
3255 Action.Done(CGF);
3256 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3257 // <copy_func>, did_it);
3258 if (DidIt.isValid()) {
3259 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3260 QualType CopyprivateArrayTy =
3261 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
3262 /*IndexTypeQuals=*/0);
3263 // Create a list of all private variables for copyprivate.
3264 Address CopyprivateList =
3265 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3266 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3267 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3268 CGF.Builder.CreateStore(
3269 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3270 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
3271 Elem);
3272 }
3273 // Build function that copies private values from single region to all other
3274 // threads in the corresponding parallel region.
3275 unsigned DefaultAS = CGM.getTargetCodeGenInfo().getDefaultAS();
3276 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
3277 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(DefaultAS),
3278 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3279 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3280 Address CL =
3281 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3282 CGF.VoidPtrTy);
3283 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3284 llvm::Value *Args[] = {
3285 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3286 getThreadID(CGF, Loc), // i32 <gtid>
3287 BufSize, // size_t <buf_size>
3288 CL.getPointer(), // void *<copyprivate list>
3289 CpyFn, // void (*) (void *, void *) <copy_func>
3290 DidItVal // i32 did_it
3291 };
3292 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3293 }
3294}
3295
3296void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3297 const RegionCodeGenTy &OrderedOpGen,
3298 SourceLocation Loc, bool IsThreads) {
3299 if (!CGF.HaveInsertPoint())
3300 return;
3301 // __kmpc_ordered(ident_t *, gtid);
3302 // OrderedOpGen();
3303 // __kmpc_end_ordered(ident_t *, gtid);
3304 // Prepare arguments and build a call to __kmpc_ordered
3305 if (IsThreads) {
3306 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3307 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3308 createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3309 Args);
3310 OrderedOpGen.setAction(Action);
3311 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3312 return;
3313 }
3314 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3315}
3316
3317unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
3318 unsigned Flags;
3319 if (Kind == OMPD_for)
3320 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3321 else if (Kind == OMPD_sections)
3322 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3323 else if (Kind == OMPD_single)
3324 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3325 else if (Kind == OMPD_barrier)
3326 Flags = OMP_IDENT_BARRIER_EXPL;
3327 else
3328 Flags = OMP_IDENT_BARRIER_IMPL;
3329 return Flags;
3330}
3331
3332void CGOpenMPRuntime::getDefaultScheduleAndChunk(
3333 CodeGenFunction &CGF, const OMPLoopDirective &S,
3334 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
3335 // Check if the loop directive is actually a doacross loop directive. In this
3336 // case choose static, 1 schedule.
3337 if (llvm::any_of(
3338 S.getClausesOfKind<OMPOrderedClause>(),
3339 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
3340 ScheduleKind = OMPC_SCHEDULE_static;
3341 // Chunk size is 1 in this case.
3342 llvm::APInt ChunkSize(32, 1);
3343 ChunkExpr = IntegerLiteral::Create(
3344 CGF.getContext(), ChunkSize,
3345 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3346 SourceLocation());
3347 }
3348}
3349
3350void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3351 OpenMPDirectiveKind Kind, bool EmitChecks,
3352 bool ForceSimpleCall) {
3353 if (!CGF.HaveInsertPoint())
3354 return;
3355 // Build call __kmpc_cancel_barrier(loc, thread_id);
3356 // Build call __kmpc_barrier(loc, thread_id);
3357 unsigned Flags = getDefaultFlagsForBarriers(Kind);
3358 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3359 // thread_id);
3360 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3361 getThreadID(CGF, Loc)};
3362 if (auto *OMPRegionInfo =
3363 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3364 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3365 llvm::Value *Result = CGF.EmitRuntimeCall(
3366 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3367 if (EmitChecks) {
3368 // if (__kmpc_cancel_barrier()) {
3369 // exit from construct;
3370 // }
3371 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3372 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3373 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3374 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3375 CGF.EmitBlock(ExitBB);
3376 // exit from construct;
3377 CodeGenFunction::JumpDest CancelDestination =
3378 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3379 CGF.EmitBranchThroughCleanup(CancelDestination);
3380 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3381 }
3382 return;
3383 }
3384 }
3385 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3386}
3387
3388/// Map the OpenMP loop schedule to the runtime enumeration.
3389static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3390 bool Chunked, bool Ordered) {
3391 switch (ScheduleKind) {
3392 case OMPC_SCHEDULE_static:
3393 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3394 : (Ordered ? OMP_ord_static : OMP_sch_static);
3395 case OMPC_SCHEDULE_dynamic:
3396 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3397 case OMPC_SCHEDULE_guided:
3398 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3399 case OMPC_SCHEDULE_runtime:
3400 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3401 case OMPC_SCHEDULE_auto:
3402 return Ordered ? OMP_ord_auto : OMP_sch_auto;
3403 case OMPC_SCHEDULE_unknown:
3404 assert(!Chunked && "chunk was specified but schedule kind not known");
3405 return Ordered ? OMP_ord_static : OMP_sch_static;
3406 }
3407 llvm_unreachable("Unexpected runtime schedule");
3408}
3409
3410/// Map the OpenMP distribute schedule to the runtime enumeration.
3411static OpenMPSchedType
3412getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3413 // only static is allowed for dist_schedule
3414 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3415}
3416
3417bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3418 bool Chunked) const {
3419 OpenMPSchedType Schedule =
3420 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3421 return Schedule == OMP_sch_static;
3422}
3423
3424bool CGOpenMPRuntime::isStaticNonchunked(
3425 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3426 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3427 return Schedule == OMP_dist_sch_static;
3428}
3429
3430bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
3431 bool Chunked) const {
3432 OpenMPSchedType Schedule =
3433 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3434 return Schedule == OMP_sch_static_chunked;
3435}
3436
3437bool CGOpenMPRuntime::isStaticChunked(
3438 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3439 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3440 return Schedule == OMP_dist_sch_static_chunked;
3441}
3442
3443bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3444 OpenMPSchedType Schedule =
3445 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3446 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3447 return Schedule != OMP_sch_static;
3448}
3449
3450static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
3451 OpenMPScheduleClauseModifier M1,
3452 OpenMPScheduleClauseModifier M2) {
3453 int Modifier = 0;
3454 switch (M1) {
3455 case OMPC_SCHEDULE_MODIFIER_monotonic:
3456 Modifier = OMP_sch_modifier_monotonic;
3457 break;
3458 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3459 Modifier = OMP_sch_modifier_nonmonotonic;
3460 break;
3461 case OMPC_SCHEDULE_MODIFIER_simd:
3462 if (Schedule == OMP_sch_static_chunked)
3463 Schedule = OMP_sch_static_balanced_chunked;
3464 break;
3465 case OMPC_SCHEDULE_MODIFIER_last:
3466 case OMPC_SCHEDULE_MODIFIER_unknown:
3467 break;
3468 }
3469 switch (M2) {
3470 case OMPC_SCHEDULE_MODIFIER_monotonic:
3471 Modifier = OMP_sch_modifier_monotonic;
3472 break;
3473 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3474 Modifier = OMP_sch_modifier_nonmonotonic;
3475 break;
3476 case OMPC_SCHEDULE_MODIFIER_simd:
3477 if (Schedule == OMP_sch_static_chunked)
3478 Schedule = OMP_sch_static_balanced_chunked;
3479 break;
3480 case OMPC_SCHEDULE_MODIFIER_last:
3481 case OMPC_SCHEDULE_MODIFIER_unknown:
3482 break;
3483 }
3484 return Schedule | Modifier;
3485}
3486
3487void CGOpenMPRuntime::emitForDispatchInit(
3488 CodeGenFunction &CGF, SourceLocation Loc,
3489 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3490 bool Ordered, const DispatchRTInput &DispatchValues) {
3491 if (!CGF.HaveInsertPoint())
3492 return;
3493 OpenMPSchedType Schedule = getRuntimeSchedule(
3494 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3495 assert(Ordered ||
3496 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3497 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3498 Schedule != OMP_sch_static_balanced_chunked));
3499 // Call __kmpc_dispatch_init(
3500 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3501 // kmp_int[32|64] lower, kmp_int[32|64] upper,
3502 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
3503
3504 // If the Chunk was not specified in the clause - use default value 1.
3505 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3506 : CGF.Builder.getIntN(IVSize, 1);
3507 llvm::Value *Args[] = {
3508 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3509 CGF.Builder.getInt32(addMonoNonMonoModifier(
3510 Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3511 DispatchValues.LB, // Lower
3512 DispatchValues.UB, // Upper
3513 CGF.Builder.getIntN(IVSize, 1), // Stride
3514 Chunk // Chunk
3515 };
3516 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3517}
3518
3519static void emitForStaticInitCall(
3520 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3521 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3522 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3523 const CGOpenMPRuntime::StaticRTInput &Values) {
3524 if (!CGF.HaveInsertPoint())
3525 return;
3526
3527 assert(!Values.Ordered);
3528 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3529 Schedule == OMP_sch_static_balanced_chunked ||
3530 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3531 Schedule == OMP_dist_sch_static ||
3532 Schedule == OMP_dist_sch_static_chunked);
3533
3534 // Call __kmpc_for_static_init(
3535 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3536 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3537 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3538 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
3539 llvm::Value *Chunk = Values.Chunk;
3540 if (Chunk == nullptr) {
3541 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3542 Schedule == OMP_dist_sch_static) &&
3543 "expected static non-chunked schedule");
3544 // If the Chunk was not specified in the clause - use default value 1.
3545 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3546 } else {
3547 assert((Schedule == OMP_sch_static_chunked ||
3548 Schedule == OMP_sch_static_balanced_chunked ||
3549 Schedule == OMP_ord_static_chunked ||
3550 Schedule == OMP_dist_sch_static_chunked) &&
3551 "expected static chunked schedule");
3552 }
3553 llvm::Value *Args[] = {
3554 UpdateLocation,
3555 ThreadId,
3556 CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1,
3557 M2)), // Schedule type
3558 Values.IL.getPointer(), // &isLastIter
3559 Values.LB.getPointer(), // &LB
3560 Values.UB.getPointer(), // &UB
3561 Values.ST.getPointer(), // &Stride
3562 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
3563 Chunk // Chunk
3564 };
3565 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3566}
3567
3568void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3569 SourceLocation Loc,
3570 OpenMPDirectiveKind DKind,
3571 const OpenMPScheduleTy &ScheduleKind,
3572 const StaticRTInput &Values) {
3573 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3574 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3575 assert(isOpenMPWorksharingDirective(DKind) &&
3576 "Expected loop-based or sections-based directive.");
3577 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3578 isOpenMPLoopDirective(DKind)
3579 ? OMP_IDENT_WORK_LOOP
3580 : OMP_IDENT_WORK_SECTIONS);
3581 llvm::Value *ThreadId = getThreadID(CGF, Loc);
3582 llvm::FunctionCallee StaticInitFunction =
3583 createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3584 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3585 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3586}
3587
3588void CGOpenMPRuntime::emitDistributeStaticInit(
3589 CodeGenFunction &CGF, SourceLocation Loc,
3590 OpenMPDistScheduleClauseKind SchedKind,
3591 const CGOpenMPRuntime::StaticRTInput &Values) {
3592 OpenMPSchedType ScheduleNum =
3593 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3594 llvm::Value *UpdatedLocation =
3595 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3596 llvm::Value *ThreadId = getThreadID(CGF, Loc);
3597 llvm::FunctionCallee StaticInitFunction =
3598 createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3599 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3600 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3601 OMPC_SCHEDULE_MODIFIER_unknown, Values);
3602}
3603
3604void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3605 SourceLocation Loc,
3606 OpenMPDirectiveKind DKind) {
3607 if (!CGF.HaveInsertPoint())
3608 return;
3609 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3610 llvm::Value *Args[] = {
3611 emitUpdateLocation(CGF, Loc,
3612 isOpenMPDistributeDirective(DKind)
3613 ? OMP_IDENT_WORK_DISTRIBUTE
3614 : isOpenMPLoopDirective(DKind)
3615 ? OMP_IDENT_WORK_LOOP
3616 : OMP_IDENT_WORK_SECTIONS),
3617 getThreadID(CGF, Loc)};
3618 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3619 Args);
3620}
3621
3622void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3623 SourceLocation Loc,
3624 unsigned IVSize,
3625 bool IVSigned) {
3626 if (!CGF.HaveInsertPoint())
3627 return;
3628 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3629 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3630 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3631}
3632
3633llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3634 SourceLocation Loc, unsigned IVSize,
3635 bool IVSigned, Address IL,
3636 Address LB, Address UB,
3637 Address ST) {
3638 // Call __kmpc_dispatch_next(
3639 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3640 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3641 // kmp_int[32|64] *p_stride);
3642 llvm::Value *Args[] = {
3643 emitUpdateLocation(CGF, Loc),
3644 getThreadID(CGF, Loc),
3645 IL.getPointer(), // &isLastIter
3646 LB.getPointer(), // &Lower
3647 UB.getPointer(), // &Upper
3648 ST.getPointer() // &Stride
3649 };
3650 llvm::Value *Call =
3651 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3652 return CGF.EmitScalarConversion(
3653 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3654 CGF.getContext().BoolTy, Loc);
3655}
3656
3657void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3658 llvm::Value *NumThreads,
3659 SourceLocation Loc) {
3660 if (!CGF.HaveInsertPoint())
3661 return;
3662 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3663 llvm::Value *Args[] = {
3664 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3665 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3666 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3667 Args);
3668}
3669
3670void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3671 OpenMPProcBindClauseKind ProcBind,
3672 SourceLocation Loc) {
3673 if (!CGF.HaveInsertPoint())
3674 return;
3675 // Constants for proc bind value accepted by the runtime.
3676 enum ProcBindTy {
3677 ProcBindFalse = 0,
3678 ProcBindTrue,
3679 ProcBindMaster,
3680 ProcBindClose,
3681 ProcBindSpread,
3682 ProcBindIntel,
3683 ProcBindDefault
3684 } RuntimeProcBind;
3685 switch (ProcBind) {
3686 case OMPC_PROC_BIND_master:
3687 RuntimeProcBind = ProcBindMaster;
3688 break;
3689 case OMPC_PROC_BIND_close:
3690 RuntimeProcBind = ProcBindClose;
3691 break;
3692 case OMPC_PROC_BIND_spread:
3693 RuntimeProcBind = ProcBindSpread;
3694 break;
3695 case OMPC_PROC_BIND_unknown:
3696 llvm_unreachable("Unsupported proc_bind value.");
3697 }
3698 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3699 llvm::Value *Args[] = {
3700 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3701 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
3702 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3703}
3704
3705void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3706 SourceLocation Loc) {
3707 if (!CGF.HaveInsertPoint())
3708 return;
3709 // Build call void __kmpc_flush(ident_t *loc)
3710 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3711 emitUpdateLocation(CGF, Loc));
3712}
3713
3714namespace {
3715/// Indexes of fields for type kmp_task_t.
3716enum KmpTaskTFields {
3717 /// List of shared variables.
3718 KmpTaskTShareds,
3719 /// Task routine.
3720 KmpTaskTRoutine,
3721 /// Partition id for the untied tasks.
3722 KmpTaskTPartId,
3723 /// Function with call of destructors for private variables.
3724 Data1,
3725 /// Task priority.
3726 Data2,
3727 /// (Taskloops only) Lower bound.
3728 KmpTaskTLowerBound,
3729 /// (Taskloops only) Upper bound.
3730 KmpTaskTUpperBound,
3731 /// (Taskloops only) Stride.
3732 KmpTaskTStride,
3733 /// (Taskloops only) Is last iteration flag.
3734 KmpTaskTLastIter,
3735 /// (Taskloops only) Reduction data.
3736 KmpTaskTReductions,
3737};
3738} // anonymous namespace
3739
3740bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3741 return OffloadEntriesTargetRegion.empty() &&
3742 OffloadEntriesDeviceGlobalVar.empty();
3743}
3744
3745/// Initialize target region entry.
3746void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3747 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3748 StringRef ParentName, unsigned LineNum,
3749 unsigned Order) {
3750 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3751 "only required for the device "
3752 "code generation.");
3753 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3754 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3755 OMPTargetRegionEntryTargetRegion);
3756 ++OffloadingEntriesNum;
3757}
3758
3759void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3760 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3761 StringRef ParentName, unsigned LineNum,
3762 llvm::Constant *Addr, llvm::Constant *ID,
3763 OMPTargetRegionEntryKind Flags) {
3764 // If we are emitting code for a target, the entry is already initialized,
3765 // only has to be registered.
3766 if (CGM.getLangOpts().OpenMPIsDevice) {
3767 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3768 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3769 DiagnosticsEngine::Error,
3770 "Unable to find target region on line '%0' in the device code.");
3771 CGM.getDiags().Report(DiagID) << LineNum;
3772 return;
3773 }
3774 auto &Entry =
3775 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3776 assert(Entry.isValid() && "Entry not initialized!");
3777 Entry.setAddress(Addr);
3778 Entry.setID(ID);
3779 Entry.setFlags(Flags);
3780 } else {
3781 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3782 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3783 ++OffloadingEntriesNum;
3784 }
3785}
3786
3787bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3788 unsigned DeviceID, unsigned FileID, StringRef ParentName,
3789 unsigned LineNum) const {
3790 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3791 if (PerDevice == OffloadEntriesTargetRegion.end())
3792 return false;
3793 auto PerFile = PerDevice->second.find(FileID);
3794 if (PerFile == PerDevice->second.end())
3795 return false;
3796 auto PerParentName = PerFile->second.find(ParentName);
3797 if (PerParentName == PerFile->second.end())
3798 return false;
3799 auto PerLine = PerParentName->second.find(LineNum);
3800 if (PerLine == PerParentName->second.end())
3801 return false;
3802 // Fail if this entry is already registered.
3803 if (PerLine->second.getAddress() || PerLine->second.getID())
3804 return false;
3805 return true;
3806}
3807
3808void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3809 const OffloadTargetRegionEntryInfoActTy &Action) {
3810 // Scan all target region entries and perform the provided action.
3811 for (const auto &D : OffloadEntriesTargetRegion)
3812 for (const auto &F : D.second)
3813 for (const auto &P : F.second)
3814 for (const auto &L : P.second)
3815 Action(D.first, F.first, P.first(), L.first, L.second);
3816}
3817
3818void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3819 initializeDeviceGlobalVarEntryInfo(StringRef Name,
3820 OMPTargetGlobalVarEntryKind Flags,
3821 unsigned Order) {
3822 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3823 "only required for the device "
3824 "code generation.");
3825 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3826 ++OffloadingEntriesNum;
3827}
3828
3829void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3830 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3831 CharUnits VarSize,
3832 OMPTargetGlobalVarEntryKind Flags,
3833 llvm::GlobalValue::LinkageTypes Linkage) {
3834 if (CGM.getLangOpts().OpenMPIsDevice) {
3835 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3836 assert(Entry.isValid() && Entry.getFlags() == Flags &&
3837 "Entry not initialized!");
3838 assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3839 "Resetting with the new address.");
3840 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3841 if (Entry.getVarSize().isZero()) {
3842 Entry.setVarSize(VarSize);
3843 Entry.setLinkage(Linkage);
3844 }
3845 return;
3846 }
3847 Entry.setVarSize(VarSize);
3848 Entry.setLinkage(Linkage);
3849 Entry.setAddress(Addr);
3850 } else {
3851 if (hasDeviceGlobalVarEntryInfo(VarName)) {
3852 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3853 assert(Entry.isValid() && Entry.getFlags() == Flags &&
3854 "Entry not initialized!");
3855 assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3856 "Resetting with the new address.");
3857 if (Entry.getVarSize().isZero()) {
3858 Entry.setVarSize(VarSize);
3859 Entry.setLinkage(Linkage);
3860 }
3861 return;
3862 }
3863 OffloadEntriesDeviceGlobalVar.try_emplace(
3864 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3865 ++OffloadingEntriesNum;
3866 }
3867}
3868
3869void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3870 actOnDeviceGlobalVarEntriesInfo(
3871 const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3872 // Scan all target region entries and perform the provided action.
3873 for (const auto &E : OffloadEntriesDeviceGlobalVar)
3874 Action(E.getKey(), E.getValue());
3875}
3876
3877llvm::Function *
3878CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
3879 // If we don't have entries or if we are emitting code for the device, we
3880 // don't need to do anything.
3881 if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
3882 return nullptr;
3883
3884 llvm::Module &M = CGM.getModule();
3885 ASTContext &C = CGM.getContext();
3886
3887 // Get list of devices we care about
3888 const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples;
3889
3890 // We should be creating an offloading descriptor only if there are devices
3891 // specified.
3892 assert(!Devices.empty() && "No OpenMP offloading devices??");
3893
3894 // Create the external variables that will point to the begin and end of the
3895 // host entries section. These will be defined by the linker.
3896 llvm::Type *OffloadEntryTy =
3897 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
3898 std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"});
3899 auto *HostEntriesBegin = new llvm::GlobalVariable(
3900 M, OffloadEntryTy, /*isConstant=*/true,
3901 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3902 EntriesBeginName);
3903 std::string EntriesEndName = getName({"omp_offloading", "entries_end"});
3904 auto *HostEntriesEnd =
3905 new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true,
3906 llvm::GlobalValue::ExternalLinkage,
3907 /*Initializer=*/nullptr, EntriesEndName);
3908
3909 // Create all device images
3910 auto *DeviceImageTy = cast<llvm::StructType>(
3911 CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
3912 ConstantInitBuilder DeviceImagesBuilder(CGM);
3913 ConstantArrayBuilder DeviceImagesEntries =
3914 DeviceImagesBuilder.beginArray(DeviceImageTy);
3915
3916 for (const llvm::Triple &Device : Devices) {
3917 StringRef T = Device.getTriple();
3918 std::string BeginName = getName({"omp_offloading", "img_start", ""});
3919 auto *ImgBegin = new llvm::GlobalVariable(
3920 M, CGM.Int8Ty, /*isConstant=*/true,
3921 llvm::GlobalValue::ExternalWeakLinkage,
3922 /*Initializer=*/nullptr, Twine(BeginName).concat(T));
3923 std::string EndName = getName({"omp_offloading", "img_end", ""});
3924 auto *ImgEnd = new llvm::GlobalVariable(
3925 M, CGM.Int8Ty, /*isConstant=*/true,
3926 llvm::GlobalValue::ExternalWeakLinkage,
3927 /*Initializer=*/nullptr, Twine(EndName).concat(T));
3928
3929 llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin,
3930 HostEntriesEnd};
3931 createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data,
3932 DeviceImagesEntries);
3933 }
3934
3935 // Create device images global array.
3936 std::string ImagesName = getName({"omp_offloading", "device_images"});
3937 llvm::GlobalVariable *DeviceImages =
3938 DeviceImagesEntries.finishAndCreateGlobal(ImagesName,
3939 CGM.getPointerAlign(),
3940 /*isConstant=*/true);
3941 DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3942
3943 // This is a Zero array to be used in the creation of the constant expressions
3944 llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
3945 llvm::Constant::getNullValue(CGM.Int32Ty)};
3946
3947 // Create the target region descriptor.
3948 llvm::Constant *Data[] = {
3949 llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
3950 llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
3951 DeviceImages, Index),
3952 HostEntriesBegin, HostEntriesEnd};
3953 std::string Descriptor = getName({"omp_offloading", "descriptor"});
3954 llvm::GlobalVariable *Desc = createGlobalStruct(
3955 CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor);
3956
3957 // Emit code to register or unregister the descriptor at execution
3958 // startup or closing, respectively.
3959
3960 llvm::Function *UnRegFn;
3961 {
3962 FunctionArgList Args;
3963 ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
3964 Args.push_back(&DummyPtr);
3965
3966 CodeGenFunction CGF(CGM);
3967 // Disable debug info for global (de-)initializer because they are not part
3968 // of some particular construct.
3969 CGF.disableDebugInfo();
3970 const auto &FI =
3971 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3972 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3973 std::string UnregName = getName({"omp_offloading", "descriptor_unreg"});
3974 UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI);
3975 CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args);
3976 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
3977 Desc);
3978 CGF.FinishFunction();
3979 }
3980 llvm::Function *RegFn;
3981 {
3982 CodeGenFunction CGF(CGM);
3983 // Disable debug info for global (de-)initializer because they are not part
3984 // of some particular construct.
3985 CGF.disableDebugInfo();
3986 const auto &FI = CGM.getTypes().arrangeNullaryFunction();
3987 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3988
3989 // Encode offload target triples into the registration function name. It
3990 // will serve as a comdat key for the registration/unregistration code for
3991 // this particular combination of offloading targets.
3992 SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U);
3993 RegFnNameParts[0] = "omp_offloading";
3994 RegFnNameParts[1] = "descriptor_reg";
3995 llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2),
3996 [](const llvm::Triple &T) -> const std::string& {
3997 return T.getTriple();
3998 });
3999 llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end());
4000 std::string Descriptor = getName(RegFnNameParts);
4001 RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI);
4002 CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList());
4003 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc);
4004 // Create a variable to drive the registration and unregistration of the
4005 // descriptor, so we can reuse the logic that emits Ctors and Dtors.
4006 ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(),
4007 SourceLocation(), nullptr, C.CharTy,
4008 ImplicitParamDecl::Other);
4009 CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
4010 CGF.FinishFunction();
4011 }
4012 if (CGM.supportsCOMDAT()) {
4013 // It is sufficient to call registration function only once, so create a
4014 // COMDAT group for registration/unregistration functions and associated
4015 // data. That would reduce startup time and code size. Registration
4016 // function serves as a COMDAT group key.
4017 llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName());
4018 RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
4019 RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
4020 RegFn->setComdat(ComdatKey);
4021 UnRegFn->setComdat(ComdatKey);
4022 DeviceImages->setComdat(ComdatKey);
4023 Desc->setComdat(ComdatKey);
4024 }
4025 return RegFn;
4026}
4027
4028void CGOpenMPRuntime::createOffloadEntry(
4029 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
4030 llvm::GlobalValue::LinkageTypes Linkage) {
4031 StringRef Name = Addr->getName();
4032 llvm::Module &M = CGM.getModule();
4033 llvm::LLVMContext &C = M.getContext();
4034
4035 // Create constant string with the name.
4036 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
4037
4038 std::string StringName = getName({"omp_offloading", "entry_name"});
4039 auto *Str = new llvm::GlobalVariable(
4040 M, StrPtrInit->getType(), /*isConstant=*/true,
4041 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
4042 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4043
4044 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
4045 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
4046 llvm::ConstantInt::get(CGM.SizeTy, Size),
4047 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
4048 llvm::ConstantInt::get(CGM.Int32Ty, 0)};
4049 std::string EntryName = getName({"omp_offloading", "entry", ""});
4050 llvm::GlobalVariable *Entry = createGlobalStruct(
4051 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
4052 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
4053
4054 // The entry has to be created in the section the linker expects it to be.
4055 std::string Section = getName({"omp_offloading", "entries"});
4056 Entry->setSection(Section);
4057}
4058
4059void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
4060 // Emit the offloading entries and metadata so that the device codegen side
4061 // can easily figure out what to emit. The produced metadata looks like
4062 // this:
4063 //
4064 // !omp_offload.info = !{!1, ...}
4065 //
4066 // Right now we only generate metadata for function that contain target
4067 // regions.
4068
4069 // If we do not have entries, we don't need to do anything.
4070 if (OffloadEntriesInfoManager.empty())
4071 return;
4072
4073 llvm::Module &M = CGM.getModule();
4074 llvm::LLVMContext &C = M.getContext();
4075 SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
4076 OrderedEntries(OffloadEntriesInfoManager.size());
4077 llvm::SmallVector<StringRef, 16> ParentFunctions(
4078 OffloadEntriesInfoManager.size());
4079
4080 // Auxiliary methods to create metadata values and strings.
4081 auto &&GetMDInt = [this](unsigned V) {
4082 return llvm::ConstantAsMetadata::get(
4083 llvm::ConstantInt::get(CGM.Int32Ty, V));
4084 };
4085
4086 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
4087
4088 // Create the offloading info metadata node.
4089 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
4090
4091 // Create function that emits metadata for each target region entry;
4092 auto &&TargetRegionMetadataEmitter =
4093 [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString](
4094 unsigned DeviceID, unsigned FileID, StringRef ParentName,
4095 unsigned Line,
4096 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
4097 // Generate metadata for target regions. Each entry of this metadata
4098 // contains:
4099 // - Entry 0 -> Kind of this type of metadata (0).
4100 // - Entry 1 -> Device ID of the file where the entry was identified.
4101 // - Entry 2 -> File ID of the file where the entry was identified.
4102 // - Entry 3 -> Mangled name of the function where the entry was
4103 // identified.
4104 // - Entry 4 -> Line in the file where the entry was identified.
4105 // - Entry 5 -> Order the entry was created.
4106 // The first element of the metadata node is the kind.
4107 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4108 GetMDInt(FileID), GetMDString(ParentName),
4109 GetMDInt(Line), GetMDInt(E.getOrder())};
4110
4111 // Save this entry in the right position of the ordered entries array.
4112 OrderedEntries[E.getOrder()] = &E;
4113 ParentFunctions[E.getOrder()] = ParentName;
4114
4115 // Add metadata to the named metadata node.
4116 MD->addOperand(llvm::MDNode::get(C, Ops));
4117 };
4118
4119 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
4120 TargetRegionMetadataEmitter);
4121
4122 // Create function that emits metadata for each device global variable entry;
4123 auto &&DeviceGlobalVarMetadataEmitter =
4124 [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4125 MD](StringRef MangledName,
4126 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
4127 &E) {
4128 // Generate metadata for global variables. Each entry of this metadata
4129 // contains:
4130 // - Entry 0 -> Kind of this type of metadata (1).
4131 // - Entry 1 -> Mangled name of the variable.
4132 // - Entry 2 -> Declare target kind.
4133 // - Entry 3 -> Order the entry was created.
4134 // The first element of the metadata node is the kind.
4135 llvm::Metadata *Ops[] = {
4136 GetMDInt(E.getKind()), GetMDString(MangledName),
4137 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4138
4139 // Save this entry in the right position of the ordered entries array.
4140 OrderedEntries[E.getOrder()] = &E;
4141
4142 // Add metadata to the named metadata node.
4143 MD->addOperand(llvm::MDNode::get(C, Ops));
4144 };
4145
4146 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
4147 DeviceGlobalVarMetadataEmitter);
4148
4149 for (const auto *E : OrderedEntries) {
4150 assert(E && "All ordered entries must exist!");
4151 if (const auto *CE =
4152 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4153 E)) {
4154 if (!CE->getID() || !CE->getAddress()) {
4155 // Do not blame the entry if the parent funtion is not emitted.
4156 StringRef FnName = ParentFunctions[CE->getOrder()];
4157 if (!CGM.GetGlobalValue(FnName))
4158 continue;
4159 unsigned DiagID = CGM.getDiags().getCustomDiagID(
4160 DiagnosticsEngine::Error,
4161 "Offloading entry for target region is incorrect: either the "
4162 "address or the ID is invalid.");
4163 CGM.getDiags().Report(DiagID);
4164 continue;
4165 }
4166 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4167 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4168 } else if (const auto *CE =
4169 dyn_cast<OffloadEntriesInfoManagerTy::
4170 OffloadEntryInfoDeviceGlobalVar>(E)) {
4171 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
4172 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4173 CE->getFlags());
4174 switch (Flags) {
4175 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
4176 if (!CE->getAddress()) {
4177 unsigned DiagID = CGM.getDiags().getCustomDiagID(
4178 DiagnosticsEngine::Error,
4179 "Offloading entry for declare target variable is incorrect: the "
4180 "address is invalid.");
4181 CGM.getDiags().Report(DiagID);
4182 continue;
4183 }
4184 // The vaiable has no definition - no need to add the entry.
4185 if (CE->getVarSize().isZero())
4186 continue;
4187 break;
4188 }
4189 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
4190 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4191 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4192 "Declaret target link address is set.");
4193 if (CGM.getLangOpts().OpenMPIsDevice)
4194 continue;
4195 if (!CE->getAddress()) {
4196 unsigned DiagID = CGM.getDiags().getCustomDiagID(
4197 DiagnosticsEngine::Error,
4198 "Offloading entry for declare target variable is incorrect: the "
4199 "address is invalid.");
4200 CGM.getDiags().Report(DiagID);
4201 continue;
4202 }
4203 break;
4204 }
4205 createOffloadEntry(CE->getAddress(), CE->getAddress(),
4206 CE->getVarSize().getQuantity(), Flags,
4207 CE->getLinkage());
4208 } else {
4209 llvm_unreachable("Unsupported entry kind.");
4210 }
4211 }
4212}
4213
4214/// Loads all the offload entries information from the host IR
4215/// metadata.
4216void CGOpenMPRuntime::loadOffloadInfoMetadata() {
4217 // If we are in target mode, load the metadata from the host IR. This code has
4218 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4219
4220 if (!CGM.getLangOpts().OpenMPIsDevice)
4221 return;
4222
4223 if (CGM.getLangOpts().OMPHostIRFile.empty())
4224 return;
4225
4226 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4227 if (auto EC = Buf.getError()) {
4228 CGM.getDiags().Report(diag::err_cannot_open_file)
4229 << CGM.getLangOpts().OMPHostIRFile << EC.message();
4230 return;
4231 }
4232
4233 llvm::LLVMContext C;
4234 auto ME = expectedToErrorOrAndEmitErrors(
4235 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4236
4237 if (auto EC = ME.getError()) {
4238 unsigned DiagID = CGM.getDiags().getCustomDiagID(
4239 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4240 CGM.getDiags().Report(DiagID)
4241 << CGM.getLangOpts().OMPHostIRFile << EC.message();
4242 return;
4243 }
4244
4245 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4246 if (!MD)
4247 return;
4248
4249 for (llvm::MDNode *MN : MD->operands()) {
4250 auto &&GetMDInt = [MN](unsigned Idx) {
4251 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4252 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4253 };
4254
4255 auto &&GetMDString = [MN](unsigned Idx) {
4256 auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4257 return V->getString();
4258 };
4259
4260 switch (GetMDInt(0)) {
4261 default:
4262 llvm_unreachable("Unexpected metadata!");
4263 break;
4264 case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4265 OffloadingEntryInfoTargetRegion:
4266 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
4267 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4268 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4269 /*Order=*/GetMDInt(5));
4270 break;
4271 case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4272 OffloadingEntryInfoDeviceGlobalVar:
4273 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
4274 /*MangledName=*/GetMDString(1),
4275 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4276 /*Flags=*/GetMDInt(2)),
4277 /*Order=*/GetMDInt(3));
4278 break;
4279 }
4280 }
4281}
4282
4283void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
4284 if (!KmpRoutineEntryPtrTy) {
4285 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4286 ASTContext &C = CGM.getContext();
4287 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4288 FunctionProtoType::ExtProtoInfo EPI;
4289 KmpRoutineEntryPtrQTy = C.getPointerType(
4290 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4291 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4292 }
4293}
4294
4295QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
4296 // Make sure the type of the entry is already created. This is the type we
4297 // have to create:
4298 // struct __tgt_offload_entry{
4299 // void *addr; // Pointer to the offload entry info.
4300 // // (function or global)
4301 // char *name; // Name of the function or global.
4302 // size_t size; // Size of the entry info (0 if it a function).
4303 // int32_t flags; // Flags associated with the entry, e.g. 'link'.
4304 // int32_t reserved; // Reserved, to use by the runtime library.
4305 // };
4306 if (TgtOffloadEntryQTy.isNull()) {
4307 ASTContext &C = CGM.getContext();
4308 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4309 RD->startDefinition();
4310 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4311 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4312 addFieldToRecordDecl(C, RD, C.getSizeType());
4313 addFieldToRecordDecl(
4314 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4315 addFieldToRecordDecl(
4316 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4317 RD->completeDefinition();
4318 RD->addAttr(PackedAttr::CreateImplicit(C));
4319 TgtOffloadEntryQTy = C.getRecordType(RD);
4320 }
4321 return TgtOffloadEntryQTy;
4322}
4323
4324QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
4325 // These are the types we need to build:
4326 // struct __tgt_device_image{
4327 // void *ImageStart; // Pointer to the target code start.
4328 // void *ImageEnd; // Pointer to the target code end.
4329 // // We also add the host entries to the device image, as it may be useful
4330 // // for the target runtime to have access to that information.
4331 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all
4332 // // the entries.
4333 // __tgt_offload_entry *EntriesEnd; // End of the table with all the
4334 // // entries (non inclusive).
4335 // };
4336 if (TgtDeviceImageQTy.isNull()) {
4337 ASTContext &C = CGM.getContext();
4338 RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image");
4339 RD->startDefinition();
4340 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4341 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4342 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4343 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4344 RD->completeDefinition();
4345 TgtDeviceImageQTy = C.getRecordType(RD);
4346 }
4347 return TgtDeviceImageQTy;
4348}
4349
4350QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
4351 // struct __tgt_bin_desc{
4352 // int32_t NumDevices; // Number of devices supported.
4353 // __tgt_device_image *DeviceImages; // Arrays of device images
4354 // // (one per device).
4355 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the
4356 // // entries.
4357 // __tgt_offload_entry *EntriesEnd; // End of the table with all the
4358 // // entries (non inclusive).
4359 // };
4360 if (TgtBinaryDescriptorQTy.isNull()) {
4361 ASTContext &C = CGM.getContext();
4362 RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc");
4363 RD->startDefinition();
4364 addFieldToRecordDecl(
4365 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4366 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
4367 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4368 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4369 RD->completeDefinition();
4370 TgtBinaryDescriptorQTy = C.getRecordType(RD);
4371 }
4372 return TgtBinaryDescriptorQTy;
4373}
4374
4375namespace {
4376struct PrivateHelpersTy {
4377 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4378 const VarDecl *PrivateElemInit)
4379 : Original(Original), PrivateCopy(PrivateCopy),
4380 PrivateElemInit(PrivateElemInit) {}
4381 const VarDecl *Original;
4382 const VarDecl *PrivateCopy;
4383 const VarDecl *PrivateElemInit;
4384};
4385typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4386} // anonymous namespace
4387
4388static RecordDecl *
4389createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4390 if (!Privates.empty()) {
4391 ASTContext &C = CGM.getContext();
4392 // Build struct .kmp_privates_t. {
4393 // /* private vars */
4394 // };
4395 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4396 RD->startDefinition();
4397 for (const auto &Pair : Privates) {
4398 const VarDecl *VD = Pair.second.Original;
4399 QualType Type = VD->getType().getNonReferenceType();
4400 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4401 if (VD->hasAttrs()) {
4402 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4403 E(VD->getAttrs().end());
4404 I != E; ++I)
4405 FD->addAttr(*I);
4406 }
4407 }
4408 RD->completeDefinition();
4409 return RD;
4410 }
4411 return nullptr;
4412}
4413
4414static RecordDecl *
4415createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4416 QualType KmpInt32Ty,
4417 QualType KmpRoutineEntryPointerQTy) {
4418 ASTContext &C = CGM.getContext();
4419 // Build struct kmp_task_t {
4420 // void * shareds;
4421 // kmp_routine_entry_t routine;
4422 // kmp_int32 part_id;
4423 // kmp_cmplrdata_t data1;
4424 // kmp_cmplrdata_t data2;
4425 // For taskloops additional fields:
4426 // kmp_uint64 lb;
4427 // kmp_uint64 ub;
4428 // kmp_int64 st;
4429 // kmp_int32 liter;
4430 // void * reductions;
4431 // };
4432 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4433 UD->startDefinition();
4434 addFieldToRecordDecl(C, UD, KmpInt32Ty);
4435 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4436 UD->completeDefinition();
4437 QualType KmpCmplrdataTy = C.getRecordType(UD);
4438 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4439 RD->startDefinition();
4440 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4441 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4442 addFieldToRecordDecl(C, RD, KmpInt32Ty);
4443 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4444 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4445 if (isOpenMPTaskLoopDirective(Kind)) {
4446 QualType KmpUInt64Ty =
4447 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4448 QualType KmpInt64Ty =
4449 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4450 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4451 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4452 addFieldToRecordDecl(C, RD, KmpInt64Ty);
4453 addFieldToRecordDecl(C, RD, KmpInt32Ty);
4454 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4455 }
4456 RD->completeDefinition();
4457 return RD;
4458}
4459
4460static RecordDecl *
4461createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4462 ArrayRef<PrivateDataTy> Privates) {
4463 ASTContext &C = CGM.getContext();
4464 // Build struct kmp_task_t_with_privates {
4465 // kmp_task_t task_data;
4466 // .kmp_privates_t. privates;
4467 // };
4468 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4469 RD->startDefinition();
4470 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4471 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4472 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4473 RD->completeDefinition();
4474 return RD;
4475}
4476
4477/// Emit a proxy function which accepts kmp_task_t as the second
4478/// argument.
4479/// \code
4480/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4481/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4482/// For taskloops:
4483/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4484/// tt->reductions, tt->shareds);
4485/// return 0;
4486/// }
4487/// \endcode
4488static llvm::Function *
4489emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4490 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4491 QualType KmpTaskTWithPrivatesPtrQTy,
4492 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4493 QualType SharedsPtrTy, llvm::Function *TaskFunction,
4494 llvm::Value *TaskPrivatesMap) {
4495 ASTContext &C = CGM.getContext();
4496 FunctionArgList Args;
4497 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4498 ImplicitParamDecl::Other);
4499 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4500 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4501 ImplicitParamDecl::Other);
4502 Args.push_back(&GtidArg);
4503 Args.push_back(&TaskTypeArg);
4504 const auto &TaskEntryFnInfo =
4505 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4506 llvm::FunctionType *TaskEntryTy =
4507 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4508 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4509 auto *TaskEntry = llvm::Function::Create(
4510 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4511 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4512 TaskEntry->setDoesNotRecurse();
4513 CodeGenFunction CGF(CGM);
4514 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4515 Loc, Loc);
4516
4517 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4518 // tt,
4519 // For taskloops:
4520 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4521 // tt->task_data.shareds);
4522 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4523 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4524 LValue TDBase = CGF.EmitLoadOfPointerLValue(
4525 CGF.GetAddrOfLocalVar(&TaskTypeArg),
4526 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4527 const auto *KmpTaskTWithPrivatesQTyRD =
4528 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4529 LValue Base =
4530 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4531 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4532 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4533 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4534 llvm::Value *PartidParam = PartIdLVal.getPointer();
4535
4536 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4537 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4538 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4539 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4540 CGF.ConvertTypeForMem(SharedsPtrTy));
4541
4542 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4543 llvm::Value *PrivatesParam;
4544 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4545 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4546 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4547 PrivatesLVal.getPointer(), CGF.VoidPtrTy);
4548 } else {
4549 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4550 }
4551
4552 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4553 TaskPrivatesMap,
4554 CGF.Builder
4555 .CreatePointerBitCastOrAddrSpaceCast(
4556 TDBase.getAddress(), CGF.VoidPtrTy)
4557 .getPointer()};
4558 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4559 std::end(CommonArgs));
4560 if (isOpenMPTaskLoopDirective(Kind)) {
4561 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4562 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4563 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4564 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4565 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4566 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4567 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4568 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4569 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4570 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4571 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4572 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4573 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4574 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4575 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4576 CallArgs.push_back(LBParam);
4577 CallArgs.push_back(UBParam);
4578 CallArgs.push_back(StParam);
4579 CallArgs.push_back(LIParam);
4580 CallArgs.push_back(RParam);
4581 }
4582 CallArgs.push_back(SharedsParam);
4583
4584 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4585 CallArgs);
4586 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4587 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4588 CGF.FinishFunction();
4589 return TaskEntry;
4590}
4591
4592static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4593 SourceLocation Loc,
4594 QualType KmpInt32Ty,
4595 QualType KmpTaskTWithPrivatesPtrQTy,
4596 QualType KmpTaskTWithPrivatesQTy) {
4597 ASTContext &C = CGM.getContext();
4598 FunctionArgList Args;
4599 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4600 ImplicitParamDecl::Other);
4601 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4602 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4603 ImplicitParamDecl::Other);
4604 Args.push_back(&GtidArg);
4605 Args.push_back(&TaskTypeArg);
4606 const auto &DestructorFnInfo =
4607 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4608 llvm::FunctionType *DestructorFnTy =
4609 CGM.getTypes().GetFunctionType(DestructorFnInfo);
4610 std::string Name =
4611 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4612 auto *DestructorFn =
4613 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4614 Name, &CGM.getModule());
4615 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4616 DestructorFnInfo);
4617 DestructorFn->setDoesNotRecurse();
4618 CodeGenFunction CGF(CGM);
4619 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4620 Args, Loc, Loc);
4621
4622 LValue Base = CGF.EmitLoadOfPointerLValue(
4623 CGF.GetAddrOfLocalVar(&TaskTypeArg),
4624 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4625 const auto *KmpTaskTWithPrivatesQTyRD =
4626 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4627 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4628 Base = CGF.EmitLValueForField(Base, *FI);
4629 for (const auto *Field :
4630 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4631 if (QualType::DestructionKind DtorKind =
4632 Field->getType().isDestructedType()) {
4633 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4634 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
4635 }
4636 }
4637 CGF.FinishFunction();
4638 return DestructorFn;
4639}
4640
4641/// Emit a privates mapping function for correct handling of private and
4642/// firstprivate variables.
4643/// \code
4644/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4645/// **noalias priv1,..., <tyn> **noalias privn) {
4646/// *priv1 = &.privates.priv1;
4647/// ...;
4648/// *privn = &.privates.privn;
4649/// }
4650/// \endcode
4651static llvm::Value *
4652emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4653 ArrayRef<const Expr *> PrivateVars,
4654 ArrayRef<const Expr *> FirstprivateVars,
4655 ArrayRef<const Expr *> LastprivateVars,
4656 QualType PrivatesQTy,
4657 ArrayRef<PrivateDataTy> Privates) {
4658 ASTContext &C = CGM.getContext();
4659 FunctionArgList Args;
4660 ImplicitParamDecl TaskPrivatesArg(
4661 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4662 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4663 ImplicitParamDecl::Other);
4664 Args.push_back(&TaskPrivatesArg);
4665 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4666 unsigned Counter = 1;
4667 for (const Expr *E : PrivateVars) {
4668 Args.push_back(ImplicitParamDecl::Create(
4669 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4670 C.getPointerType(C.getPointerType(E->getType()))
4671 .withConst()
4672 .withRestrict(),
4673 ImplicitParamDecl::Other));
4674 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4675 PrivateVarsPos[VD] = Counter;
4676 ++Counter;
4677 }
4678 for (const Expr *E : FirstprivateVars) {
4679 Args.push_back(ImplicitParamDecl::Create(
4680 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4681 C.getPointerType(C.getPointerType(E->getType()))
4682 .withConst()
4683 .withRestrict(),
4684 ImplicitParamDecl::Other));
4685 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4686 PrivateVarsPos[VD] = Counter;
4687 ++Counter;
4688 }
4689 for (const Expr *E : LastprivateVars) {
4690 Args.push_back(ImplicitParamDecl::Create(
4691 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4692 C.getPointerType(C.getPointerType(E->getType()))
4693 .withConst()
4694 .withRestrict(),
4695 ImplicitParamDecl::Other));
4696 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4697 PrivateVarsPos[VD] = Counter;
4698 ++Counter;
4699 }
4700 const auto &TaskPrivatesMapFnInfo =
4701 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4702 llvm::FunctionType *TaskPrivatesMapTy =
4703 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4704 std::string Name =
4705 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4706 auto *TaskPrivatesMap = llvm::Function::Create(
4707 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4708 &CGM.getModule());
4709 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4710 TaskPrivatesMapFnInfo);
4711 if (CGM.getLangOpts().Optimize) {
4712 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4713 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4714 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4715 }
4716 CodeGenFunction CGF(CGM);
4717 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4718 TaskPrivatesMapFnInfo, Args, Loc, Loc);
4719
4720 // *privi = &.privates.privi;
4721 LValue Base = CGF.EmitLoadOfPointerLValue(
4722 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4723 TaskPrivatesArg.getType()->castAs<PointerType>());
4724 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4725 Counter = 0;
4726 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4727 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4728 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4729 LValue RefLVal =
4730 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4731 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4732 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
4733 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
4734 ++Counter;
4735 }
4736 CGF.FinishFunction();
4737 return TaskPrivatesMap;
4738}
4739
4740/// Emit initialization for private variables in task-based directives.
4741static void emitPrivatesInit(CodeGenFunction &CGF,
4742 const OMPExecutableDirective &D,
4743 Address KmpTaskSharedsPtr, LValue TDBase,
4744 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4745 QualType SharedsTy, QualType SharedsPtrTy,
4746 const OMPTaskDataTy &Data,
4747 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4748 ASTContext &C = CGF.getContext();
4749 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4750 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4751 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4752 ? OMPD_taskloop
4753 : OMPD_task;
4754 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4755 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4756 LValue SrcBase;
4757 bool IsTargetTask =
4758 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4759 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4760 // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4761 // PointersArray and SizesArray. The original variables for these arrays are
4762 // not captured and we get their addresses explicitly.
4763 if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4764 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4765 SrcBase = CGF.MakeAddrLValue(
4766 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4767 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4768 SharedsTy);
4769 }
4770 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4771 for (const PrivateDataTy &Pair : Privates) {
4772 const VarDecl *VD = Pair.second.PrivateCopy;
4773 const Expr *Init = VD->getAnyInitializer();
4774 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4775 !CGF.isTrivialInitializer(Init)))) {
4776 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4777 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4778 const VarDecl *OriginalVD = Pair.second.Original;
4779 // Check if the variable is the target-based BasePointersArray,
4780 // PointersArray or SizesArray.
4781 LValue SharedRefLValue;
4782 QualType Type = PrivateLValue.getType();
4783 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4784 if (IsTargetTask && !SharedField) {
4785 assert(isa<ImplicitParamDecl>(OriginalVD) &&
4786 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4787 cast<CapturedDecl>(OriginalVD->getDeclContext())
4788 ->getNumParams() == 0 &&
4789 isa<TranslationUnitDecl>(
4790 cast<CapturedDecl>(OriginalVD->getDeclContext())
4791 ->getDeclContext()) &&
4792 "Expected artificial target data variable.");
4793 SharedRefLValue =
4794 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4795 } else {
4796 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4797 SharedRefLValue = CGF.MakeAddrLValue(
4798 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
4799 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4800 SharedRefLValue.getTBAAInfo());
4801 }
4802 if (Type->isArrayType()) {
4803 // Initialize firstprivate array.
4804 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4805 // Perform simple memcpy.
4806 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4807 } else {
4808 // Initialize firstprivate array using element-by-element
4809 // initialization.
4810 CGF.EmitOMPAggregateAssign(
4811 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
4812 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4813 Address SrcElement) {
4814 // Clean up any temporaries needed by the initialization.
4815 CodeGenFunction::OMPPrivateScope InitScope(CGF);
4816 InitScope.addPrivate(
4817 Elem, [SrcElement]() -> Address { return SrcElement; });
4818 (void)InitScope.Privatize();
4819 // Emit initialization for single element.
4820 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4821 CGF, &CapturesInfo);
4822 CGF.EmitAnyExprToMem(Init, DestElement,
4823 Init->getType().getQualifiers(),
4824 /*IsInitializer=*/false);
4825 });
4826 }
4827 } else {
4828 CodeGenFunction::OMPPrivateScope InitScope(CGF);
4829 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
4830 return SharedRefLValue.getAddress();
4831 });
4832 (void)InitScope.Privatize();
4833 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4834 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4835 /*capturedByInit=*/false);
4836 }
4837 } else {
4838 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4839 }
4840 }
4841 ++FI;
4842 }
4843}
4844
4845/// Check if duplication function is required for taskloops.
4846static bool checkInitIsRequired(CodeGenFunction &CGF,
4847 ArrayRef<PrivateDataTy> Privates) {
4848 bool InitRequired = false;
4849 for (const PrivateDataTy &Pair : Privates) {
4850 const VarDecl *VD = Pair.second.PrivateCopy;
4851 const Expr *Init = VD->getAnyInitializer();
4852 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4853 !CGF.isTrivialInitializer(Init));
4854 if (InitRequired)
4855 break;
4856 }
4857 return InitRequired;
4858}
4859
4860
4861/// Emit task_dup function (for initialization of
4862/// private/firstprivate/lastprivate vars and last_iter flag)
4863/// \code
4864/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4865/// lastpriv) {
4866/// // setup lastprivate flag
4867/// task_dst->last = lastpriv;
4868/// // could be constructor calls here...
4869/// }
4870/// \endcode
4871static llvm::Value *
4872emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4873 const OMPExecutableDirective &D,
4874 QualType KmpTaskTWithPrivatesPtrQTy,
4875 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4876 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4877 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4878 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4879 ASTContext &C = CGM.getContext();
4880 FunctionArgList Args;
4881 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4882 KmpTaskTWithPrivatesPtrQTy,
4883 ImplicitParamDecl::Other);
4884 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4885 KmpTaskTWithPrivatesPtrQTy,
4886 ImplicitParamDecl::Other);
4887 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4888 ImplicitParamDecl::Other);
4889 Args.push_back(&DstArg);
4890 Args.push_back(&SrcArg);
4891 Args.push_back(&LastprivArg);
4892 const auto &TaskDupFnInfo =
4893 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4894 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4895 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4896 auto *TaskDup = llvm::Function::Create(
4897 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4898 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4899 TaskDup->setDoesNotRecurse();
4900 CodeGenFunction CGF(CGM);
4901 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4902 Loc);
4903
4904 LValue TDBase = CGF.EmitLoadOfPointerLValue(
4905 CGF.GetAddrOfLocalVar(&DstArg),
4906 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4907 // task_dst->liter = lastpriv;
4908 if (WithLastIter) {
4909 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4910 LValue Base = CGF.EmitLValueForField(
4911 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4912 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4913 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4914 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4915 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4916 }
4917
4918 // Emit initial values for private copies (if any).
4919 assert(!Privates.empty());
4920 Address KmpTaskSharedsPtr = Address::invalid();
4921 if (!Data.FirstprivateVars.empty()) {
4922 LValue TDBase = CGF.EmitLoadOfPointerLValue(
4923 CGF.GetAddrOfLocalVar(&SrcArg),
4924 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4925 LValue Base = CGF.EmitLValueForField(
4926 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4927 KmpTaskSharedsPtr = Address(
4928 CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4929 Base, *std::next(KmpTaskTQTyRD->field_begin(),
4930 KmpTaskTShareds)),
4931 Loc),
4932 CGF.getNaturalTypeAlignment(SharedsTy));
4933 }
4934 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4935 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4936 CGF.FinishFunction();
4937 return TaskDup;
4938}
4939
4940/// Checks if destructor function is required to be generated.
4941/// \return true if cleanups are required, false otherwise.
4942static bool
4943checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4944 bool NeedsCleanup = false;
4945 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4946 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4947 for (const FieldDecl *FD : PrivateRD->fields()) {
4948 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4949 if (NeedsCleanup)
4950 break;
4951 }
4952 return NeedsCleanup;
4953}
4954
4955CGOpenMPRuntime::TaskResultTy
4956CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4957 const OMPExecutableDirective &D,
4958 llvm::Function *TaskFunction, QualType SharedsTy,
4959 Address Shareds, const OMPTaskDataTy &Data) {
4960 ASTContext &C = CGM.getContext();
4961 llvm::SmallVector<PrivateDataTy, 4> Privates;
4962 // Aggregate privates and sort them by the alignment.
4963 auto I = Data.PrivateCopies.begin();
4964 for (const Expr *E : Data.PrivateVars) {
4965 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4966 Privates.emplace_back(
4967 C.getDeclAlign(VD),
4968 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4969 /*PrivateElemInit=*/nullptr));
4970 ++I;
4971 }
4972 I = Data.FirstprivateCopies.begin();
4973 auto IElemInitRef = Data.FirstprivateInits.begin();
4974 for (const Expr *E : Data.FirstprivateVars) {
4975 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4976 Privates.emplace_back(
4977 C.getDeclAlign(VD),
4978 PrivateHelpersTy(
4979 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4980 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4981 ++I;
4982 ++IElemInitRef;
4983 }
4984 I = Data.LastprivateCopies.begin();
4985 for (const Expr *E : Data.LastprivateVars) {
4986 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4987 Privates.emplace_back(
4988 C.getDeclAlign(VD),
4989 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4990 /*PrivateElemInit=*/nullptr));
4991 ++I;
4992 }
4993 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
4994 return L.first > R.first;
4995 });
4996 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4997 // Build type kmp_routine_entry_t (if not built yet).
4998 emitKmpRoutineEntryT(KmpInt32Ty);
4999 // Build type kmp_task_t (if not built yet).
5000 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
5001 if (SavedKmpTaskloopTQTy.isNull()) {
5002 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5003 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5004 }
5005 KmpTaskTQTy = SavedKmpTaskloopTQTy;
5006 } else {
5007 assert((D.getDirectiveKind() == OMPD_task ||
5008 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
5009 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
5010 "Expected taskloop, task or target directive");
5011 if (SavedKmpTaskTQTy.isNull()) {
5012 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5013 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5014 }
5015 KmpTaskTQTy = SavedKmpTaskTQTy;
5016 }
5017 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
5018 // Build particular struct kmp_task_t for the given task.
5019 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
5020 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
5021 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
5022 QualType KmpTaskTWithPrivatesPtrQTy =
5023 C.getPointerType(KmpTaskTWithPrivatesQTy);
5024 unsigned DefaultAS = CGM.getTargetCodeGenInfo().getDefaultAS();
5025 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
5026 llvm::Type *KmpTaskTWithPrivatesPtrTy =
5027 KmpTaskTWithPrivatesTy->getPointerTo(DefaultAS);
5028 llvm::Value *KmpTaskTWithPrivatesTySize =
5029 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
5030 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
5031
5032 // Emit initial values for private copies (if any).
5033 llvm::Value *TaskPrivatesMap = nullptr;
5034 llvm::Type *TaskPrivatesMapTy =
5035 std::next(TaskFunction->arg_begin(), 3)->getType();
5036 if (!Privates.empty()) {
5037 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
5038 TaskPrivatesMap = emitTaskPrivateMappingFunction(
5039 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
5040 FI->getType(), Privates);
5041 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5042 TaskPrivatesMap, TaskPrivatesMapTy);
5043 } else {
5044 TaskPrivatesMap = llvm::ConstantPointerNull::get(
5045 cast<llvm::PointerType>(TaskPrivatesMapTy));
5046 }
5047 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
5048 // kmp_task_t *tt);
5049 llvm::Function *TaskEntry = emitProxyTaskFunction(
5050 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5051 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
5052 TaskPrivatesMap);
5053
5054 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
5055 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
5056 // kmp_routine_entry_t *task_entry);
5057 // Task flags. Format is taken from
5058 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
5059 // description of kmp_tasking_flags struct.
5060 enum {
5061 TiedFlag = 0x1,
5062 FinalFlag = 0x2,
5063 DestructorsFlag = 0x8,
5064 PriorityFlag = 0x20
5065 };
5066 unsigned Flags = Data.Tied ? TiedFlag : 0;
5067 bool NeedsCleanup = false;
5068 if (!Privates.empty()) {
5069 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
5070 if (NeedsCleanup)
5071 Flags = Flags | DestructorsFlag;
5072 }
5073 if (Data.Priority.getInt())
5074 Flags = Flags | PriorityFlag;
5075 llvm::Value *TaskFlags =
5076 Data.Final.getPointer()
5077 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
5078 CGF.Builder.getInt32(FinalFlag),
5079 CGF.Builder.getInt32(/*C=*/0))
5080 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
5081 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
5082 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
5083 llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
5084 getThreadID(CGF, Loc), TaskFlags,
5085 KmpTaskTWithPrivatesTySize, SharedsSize,
5086 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5087 TaskEntry, KmpRoutineEntryPtrTy)};
5088 llvm::Value *NewTask = CGF.EmitRuntimeCall(
5089 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
5090 llvm::Value *NewTaskNewTaskTTy =
5091 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5092 NewTask, KmpTaskTWithPrivatesPtrTy);
5093 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
5094 KmpTaskTWithPrivatesQTy);
5095 LValue TDBase =
5096 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
5097 // Fill the data in the resulting kmp_task_t record.
5098 // Copy shareds if there are any.
5099 Address KmpTaskSharedsPtr = Address::invalid();
5100 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
5101 KmpTaskSharedsPtr =
5102 Address(CGF.EmitLoadOfScalar(
5103 CGF.EmitLValueForField(
5104 TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
5105 KmpTaskTShareds)),
5106 Loc),
5107 CGF.getNaturalTypeAlignment(SharedsTy));
5108 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
5109 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
5110 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
5111 }
5112 // Emit initial values for private copies (if any).
5113 TaskResultTy Result;
5114 if (!Privates.empty()) {
5115 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
5116 SharedsTy, SharedsPtrTy, Data, Privates,
5117 /*ForDup=*/false);
5118 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
5119 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
5120 Result.TaskDupFn = emitTaskDupFunction(
5121 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
5122 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
5123 /*WithLastIter=*/!Data.LastprivateVars.empty());
5124 }
5125 }
5126 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
5127 enum { Priority = 0, Destructors = 1 };
5128 // Provide pointer to function with destructors for privates.
5129 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
5130 const RecordDecl *KmpCmplrdataUD =
5131 (*FI)->getType()->getAsUnionType()->getDecl();
5132 if (NeedsCleanup) {
5133 llvm::Value *DestructorFn = emitDestructorsFunction(
5134 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5135 KmpTaskTWithPrivatesQTy);
5136 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
5137 LValue DestructorsLV = CGF.EmitLValueForField(
5138 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
5139 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5140 DestructorFn, KmpRoutineEntryPtrTy),
5141 DestructorsLV);
5142 }
5143 // Set priority.
5144 if (Data.Priority.getInt()) {
5145 LValue Data2LV = CGF.EmitLValueForField(
5146 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
5147 LValue PriorityLV = CGF.EmitLValueForField(
5148 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
5149 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
5150 }
5151 Result.NewTask = NewTask;
5152 Result.TaskEntry = TaskEntry;
5153 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
5154 Result.TDBase = TDBase;
5155 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
5156 return Result;
5157}
5158
5159void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5160 const OMPExecutableDirective &D,
5161 llvm::Function *TaskFunction,
5162 QualType SharedsTy, Address Shareds,
5163 const Expr *IfCond,
5164 const OMPTaskDataTy &Data) {
5165 if (!CGF.HaveInsertPoint())
5166 return;
5167
5168 TaskResultTy Result =
5169 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5170 llvm::Value *NewTask = Result.NewTask;
5171 llvm::Function *TaskEntry = Result.TaskEntry;
5172 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5173 LValue TDBase = Result.TDBase;
5174 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5175 ASTContext &C = CGM.getContext();
5176 // Process list of dependences.
5177 Address DependenciesArray = Address::invalid();
5178 unsigned NumDependencies = Data.Dependences.size();
5179 if (NumDependencies) {
5180 // Dependence kind for RTL.
5181 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 };
5182 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5183 RecordDecl *KmpDependInfoRD;
5184 QualType FlagsTy =
5185 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5186 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5187 if (KmpDependInfoTy.isNull()) {
5188 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5189 KmpDependInfoRD->startDefinition();
5190 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5191 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5192 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5193 KmpDependInfoRD->completeDefinition();
5194 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5195 } else {
5196 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5197 }
5198 // Define type kmp_depend_info[<Dependences.size()>];
5199 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5200 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
5201 ArrayType::Normal, /*IndexTypeQuals=*/0);
5202 // kmp_depend_info[<Dependences.size()>] deps;
5203 DependenciesArray =
5204 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5205 for (unsigned I = 0; I < NumDependencies; ++I) {
5206 const Expr *E = Data.Dependences[I].second;
5207 LValue Addr = CGF.EmitLValue(E);
5208 llvm::Value *Size;
5209 QualType Ty = E->getType();
5210 if (const auto *ASE =
5211 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5212 LValue UpAddrLVal =
5213 CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
5214 llvm::Value *UpAddr =
5215 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
5216 llvm::Value *LowIntPtr =
5217 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
5218 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5219 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5220 } else {
5221 Size = CGF.getTypeSize(Ty);
5222 }
5223 LValue Base = CGF.MakeAddrLValue(
5224 CGF.Builder.CreateConstArrayGEP(DependenciesArray, I),
5225 KmpDependInfoTy);
5226 // deps[i].base_addr = &<Dependences[i].second>;
5227 LValue BaseAddrLVal = CGF.EmitLValueForField(
5228 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5229 CGF.EmitStoreOfScalar(
5230 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
5231 BaseAddrLVal);
5232 // deps[i].len = sizeof(<Dependences[i].second>);
5233 LValue LenLVal = CGF.EmitLValueForField(
5234 Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5235 CGF.EmitStoreOfScalar(Size, LenLVal);
5236 // deps[i].flags = <Dependences[i].first>;
5237 RTLDependenceKindTy DepKind;
5238 switch (Data.Dependences[I].first) {
5239 case OMPC_DEPEND_in:
5240 DepKind = DepIn;
5241 break;
5242 // Out and InOut dependencies must use the same code.
5243 case OMPC_DEPEND_out:
5244 case OMPC_DEPEND_inout:
5245 DepKind = DepInOut;
5246 break;
5247 case OMPC_DEPEND_mutexinoutset:
5248 DepKind = DepMutexInOutSet;
5249 break;
5250 case OMPC_DEPEND_source:
5251 case OMPC_DEPEND_sink:
5252 case OMPC_DEPEND_unknown:
5253 llvm_unreachable("Unknown task dependence type");
5254 }
5255 LValue FlagsLVal = CGF.EmitLValueForField(
5256 Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5257 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5258 FlagsLVal);
5259 }
5260 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5261 CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy);
5262 }
5263
5264 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5265 // libcall.
5266 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5267 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5268 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5269 // list is not empty
5270 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5271 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5272 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5273 llvm::Value *DepTaskArgs[7];
5274 if (NumDependencies) {
5275 DepTaskArgs[0] = UpLoc;
5276 DepTaskArgs[1] = ThreadID;
5277 DepTaskArgs[2] = NewTask;
5278 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
5279 DepTaskArgs[4] = DependenciesArray.getPointer();
5280 DepTaskArgs[5] = CGF.Builder.getInt32(0);
5281 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5282 }
5283 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
5284 &TaskArgs,
5285 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5286 if (!Data.Tied) {
5287 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5288 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5289 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5290 }
5291 if (NumDependencies) {
5292 CGF.EmitRuntimeCall(
5293 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
5294 } else {
5295 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5296 TaskArgs);
5297 }
5298 // Check if parent region is untied and build return for untied task;
5299 if (auto *Region =
5300 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5301 Region->emitUntiedSwitch(CGF);
5302 };
5303
5304 llvm::Value *DepWaitTaskArgs[6];
5305 if (NumDependencies) {
5306 DepWaitTaskArgs[0] = UpLoc;
5307 DepWaitTaskArgs[1] = ThreadID;
5308 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
5309 DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5310 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5311 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5312 }
5313 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5314 NumDependencies, &DepWaitTaskArgs,
5315 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5316 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5317 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5318 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5319 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5320 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5321 // is specified.
5322 if (NumDependencies)
5323 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5324 DepWaitTaskArgs);
5325 // Call proxy_task_entry(gtid, new_task);
5326 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5327 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5328 Action.Enter(CGF);
5329 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5330 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5331 OutlinedFnArgs);
5332 };
5333
5334 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5335 // kmp_task_t *new_task);
5336 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5337 // kmp_task_t *new_task);
5338 RegionCodeGenTy RCG(CodeGen);
5339 CommonActionTy Action(
5340 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5341 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5342 RCG.setAction(Action);
5343 RCG(CGF);
5344 };
5345
5346 if (IfCond) {
5347 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5348 } else {
5349 RegionCodeGenTy ThenRCG(ThenCodeGen);
5350 ThenRCG(CGF);
5351 }
5352}
5353
5354void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5355 const OMPLoopDirective &D,
5356 llvm::Function *TaskFunction,
5357 QualType SharedsTy, Address Shareds,
5358 const Expr *IfCond,
5359 const OMPTaskDataTy &Data) {
5360 if (!CGF.HaveInsertPoint())
5361 return;
5362 TaskResultTy Result =
5363 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5364 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5365 // libcall.
5366 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5367 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5368 // sched, kmp_uint64 grainsize, void *task_dup);
5369 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5370 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5371 llvm::Value *IfVal;
5372 if (IfCond) {
5373 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5374 /*isSigned=*/true);
5375 } else {
5376 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5377 }
5378
5379 LValue LBLVal = CGF.EmitLValueForField(
5380 Result.TDBase,
5381 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5382 const auto *LBVar =
5383 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5384 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
5385 /*IsInitializer=*/true);
5386 LValue UBLVal = CGF.EmitLValueForField(
5387 Result.TDBase,
5388 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5389 const auto *UBVar =
5390 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5391 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
5392 /*IsInitializer=*/true);
5393 LValue StLVal = CGF.EmitLValueForField(
5394 Result.TDBase,
5395 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5396 const auto *StVar =
5397 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5398 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
5399 /*IsInitializer=*/true);
5400 // Store reductions address.
5401 LValue RedLVal = CGF.EmitLValueForField(
5402 Result.TDBase,
5403 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5404 if (Data.Reductions) {
5405 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5406 } else {
5407 CGF.EmitNullInitialization(RedLVal.getAddress(),
5408 CGF.getContext().VoidPtrTy);
5409 }
5410 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5411 llvm::Value *TaskArgs[] = {
5412 UpLoc,
5413 ThreadID,
5414 Result.NewTask,
5415 IfVal,
5416 LBLVal.getPointer(),
5417 UBLVal.getPointer(),
5418 CGF.EmitLoadOfScalar(StLVal, Loc),
5419 llvm::ConstantInt::getSigned(
5420 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5421 llvm::ConstantInt::getSigned(
5422 CGF.IntTy, Data.Schedule.getPointer()
5423 ? Data.Schedule.getInt() ? NumTasks : Grainsize
5424 : NoSchedule),
5425 Data.Schedule.getPointer()
5426 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5427 /*isSigned=*/false)
5428 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5429 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5430 Result.TaskDupFn, CGF.VoidPtrTy)
5431 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5432 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
5433}
5434
5435/// Emit reduction operation for each element of array (required for
5436/// array sections) LHS op = RHS.
5437/// \param Type Type of array.
5438/// \param LHSVar Variable on the left side of the reduction operation
5439/// (references element of array in original variable).
5440/// \param RHSVar Variable on the right side of the reduction operation
5441/// (references element of array in original variable).
5442/// \param RedOpGen Generator of reduction operation with use of LHSVar and
5443/// RHSVar.
5444static void EmitOMPAggregateReduction(
5445 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5446 const VarDecl *RHSVar,
5447 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5448 const Expr *, const Expr *)> &RedOpGen,
5449 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5450 const Expr *UpExpr = nullptr) {
5451 // Perform element-by-element initialization.
5452 QualType ElementTy;
5453 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5454 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5455
5456 // Drill down to the base element type on both arrays.
5457 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5458 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5459
5460 llvm::Value *RHSBegin = RHSAddr.getPointer();
5461 llvm::Value *LHSBegin = LHSAddr.getPointer();
5462 // Cast from pointer to array type to pointer to single element.
5463 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5464 // The basic structure here is a while-do loop.
5465 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5466 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5467 llvm::Value *IsEmpty =
5468 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5469 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5470
5471 // Enter the loop body, making that address the current address.
5472 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5473 CGF.EmitBlock(BodyBB);
5474
5475 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5476
5477 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5478 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5479 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5480 Address RHSElementCurrent =
5481 Address(RHSElementPHI,
5482 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5483
5484 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5485 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5486 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5487 Address LHSElementCurrent =
5488 Address(LHSElementPHI,
5489 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5490
5491 // Emit copy.
5492 CodeGenFunction::OMPPrivateScope Scope(CGF);
5493 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5494 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5495 Scope.Privatize();
5496 RedOpGen(CGF, XExpr, EExpr, UpExpr);
5497 Scope.ForceCleanup();
5498
5499 // Shift the address forward by one element.
5500 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5501 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5502 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5503 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5504 // Check whether we've reached the end.
5505 llvm::Value *Done =
5506 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5507 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5508 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5509 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5510
5511 // Done.
5512 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5513}
5514
5515/// Emit reduction combiner. If the combiner is a simple expression emit it as
5516/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5517/// UDR combiner function.
5518static void emitReductionCombiner(CodeGenFunction &CGF,
5519 const Expr *ReductionOp) {
5520 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5521 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5522 if (const auto *DRE =
5523 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5524 if (const auto *DRD =
5525 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5526 std::pair<llvm::Function *, llvm::Function *> Reduction =
5527 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5528 RValue Func = RValue::get(Reduction.first);
5529 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5530 CGF.EmitIgnoredExpr(ReductionOp);
5531 return;
5532 }
5533 CGF.EmitIgnoredExpr(ReductionOp);
5534}
5535
5536llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5537 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5538 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5539 ArrayRef<const Expr *> ReductionOps) {
5540 ASTContext &C = CGM.getContext();
5541
5542 // void reduction_func(void *LHSArg, void *RHSArg);
5543 FunctionArgList Args;
5544 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5545 ImplicitParamDecl::Other);
5546 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5547 ImplicitParamDecl::Other);
5548 Args.push_back(&LHSArg);
5549 Args.push_back(&RHSArg);
5550 const auto &CGFI =
5551 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5552 std::string Name = getName({"omp", "reduction", "reduction_func"});
5553 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5554 llvm::GlobalValue::InternalLinkage, Name,
5555 &CGM.getModule());
5556 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5557 Fn->setDoesNotRecurse();
5558 CodeGenFunction CGF(CGM);
5559 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5560
5561 // Dst = (void*[n])(LHSArg);
5562 // Src = (void*[n])(RHSArg);
5563 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5564 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5565 ArgsType), CGF.getPointerAlign());
5566 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5567 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5568 ArgsType), CGF.getPointerAlign());
5569
5570 // ...
5571 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5572 // ...
5573 CodeGenFunction::OMPPrivateScope Scope(CGF);
5574 auto IPriv = Privates.begin();
5575 unsigned Idx = 0;
5576 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5577 const auto *RHSVar =
5578 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5579 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5580 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5581 });
5582 const auto *LHSVar =
5583 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5584 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5585 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5586 });
5587 QualType PrivTy = (*IPriv)->getType();
5588 if (PrivTy->isVariablyModifiedType()) {
5589 // Get array size and emit VLA type.
5590 ++Idx;
5591 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5592 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5593 const VariableArrayType *VLA =
5594 CGF.getContext().getAsVariableArrayType(PrivTy);
5595 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5596 CodeGenFunction::OpaqueValueMapping OpaqueMap(
5597 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5598 CGF.EmitVariablyModifiedType(PrivTy);
5599 }
5600 }
5601 Scope.Privatize();
5602 IPriv = Privates.begin();
5603 auto ILHS = LHSExprs.begin();
5604 auto IRHS = RHSExprs.begin();
5605 for (const Expr *E : ReductionOps) {
5606 if ((*IPriv)->getType()->isArrayType()) {
5607 // Emit reduction for array section.
5608 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5609 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5610 EmitOMPAggregateReduction(
5611 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5612 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5613 emitReductionCombiner(CGF, E);
5614 });
5615 } else {
5616 // Emit reduction for array subscript or single variable.
5617 emitReductionCombiner(CGF, E);
5618 }
5619 ++IPriv;
5620 ++ILHS;
5621 ++IRHS;
5622 }
5623 Scope.ForceCleanup();
5624 CGF.FinishFunction();
5625 return Fn;
5626}
5627
5628void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5629 const Expr *ReductionOp,
5630 const Expr *PrivateRef,
5631 const DeclRefExpr *LHS,
5632 const DeclRefExpr *RHS) {
5633 if (PrivateRef->getType()->isArrayType()) {
5634 // Emit reduction for array section.
5635 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5636 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5637 EmitOMPAggregateReduction(
5638 CGF, PrivateRef->getType(), LHSVar, RHSVar,
5639 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5640 emitReductionCombiner(CGF, ReductionOp);
5641 });
5642 } else {
5643 // Emit reduction for array subscript or single variable.
5644 emitReductionCombiner(CGF, ReductionOp);
5645 }
5646}
5647
5648void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5649 ArrayRef<const Expr *> Privates,
5650 ArrayRef<const Expr *> LHSExprs,
5651 ArrayRef<const Expr *> RHSExprs,
5652 ArrayRef<const Expr *> ReductionOps,
5653 ReductionOptionsTy Options) {
5654 if (!CGF.HaveInsertPoint())
5655 return;
5656
5657 bool WithNowait = Options.WithNowait;
5658 bool SimpleReduction = Options.SimpleReduction;
5659
5660 // Next code should be emitted for reduction:
5661 //
5662 // static kmp_critical_name lock = { 0 };
5663 //
5664 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5665 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5666 // ...
5667 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5668 // *(Type<n>-1*)rhs[<n>-1]);
5669 // }
5670 //
5671 // ...
5672 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5673 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5674 // RedList, reduce_func, &<lock>)) {
5675 // case 1:
5676 // ...
5677 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5678 // ...
5679 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5680 // break;
5681 // case 2:
5682 // ...
5683 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5684 // ...
5685 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5686 // break;
5687 // default:;
5688 // }
5689 //
5690 // if SimpleReduction is true, only the next code is generated:
5691 // ...
5692 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5693 // ...
5694
5695 ASTContext &C = CGM.getContext();
5696
5697 if (SimpleReduction) {
5698 CodeGenFunction::RunCleanupsScope Scope(CGF);
5699 auto IPriv = Privates.begin();
5700 auto ILHS = LHSExprs.begin();
5701 auto IRHS = RHSExprs.begin();
5702 for (const Expr *E : ReductionOps) {
5703 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5704 cast<DeclRefExpr>(*IRHS));
5705 ++IPriv;
5706 ++ILHS;
5707 ++IRHS;
5708 }
5709 return;
5710 }
5711
5712 // 1. Build a list of reduction variables.
5713 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5714 auto Size = RHSExprs.size();
5715 for (const Expr *E : Privates) {
5716 if (E->getType()->isVariablyModifiedType())
5717 // Reserve place for array size.
5718 ++Size;
5719 }
5720 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5721 QualType ReductionArrayTy =
5722 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
5723 /*IndexTypeQuals=*/0);
5724 Address ReductionList =
5725 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5726 auto IPriv = Privates.begin();
5727 unsigned Idx = 0;
5728 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5729 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5730 CGF.Builder.CreateStore(
5731 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5732 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
5733 Elem);
5734 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5735 // Store array size.
5736 ++Idx;
5737 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5738 llvm::Value *Size = CGF.Builder.CreateIntCast(
5739 CGF.getVLASize(
5740 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5741 .NumElts,
5742 CGF.SizeTy, /*isSigned=*/false);
5743 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5744 Elem);
5745 }
5746 }
5747
5748 // 2. Emit reduce_func().
5749 unsigned DefaultAS = CGM.getTargetCodeGenInfo().getDefaultAS();
5750 llvm::Function *ReductionFn = emitReductionFunction(
5751 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(DefaultAS), Privates,
5752 LHSExprs, RHSExprs, ReductionOps);
5753
5754 // 3. Create static kmp_critical_name lock = { 0 };
5755 std::string Name = getName({"reduction"});
5756 llvm::Value *Lock = getCriticalRegionLock(Name);
5757
5758 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5759 // RedList, reduce_func, &<lock>);
5760 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5761 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5762 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5763 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5764 ReductionList.getPointer(), CGF.VoidPtrTy);
5765 llvm::Value *Args[] = {
5766 IdentTLoc, // ident_t *<loc>
5767 ThreadId, // i32 <gtid>
5768 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5769 ReductionArrayTySize, // size_type sizeof(RedList)
5770 RL, // void *RedList
5771 ReductionFn, // void (*) (void *, void *) <reduce_func>
5772 Lock // kmp_critical_name *&<lock>
5773 };
5774 llvm::Value *Res = CGF.EmitRuntimeCall(
5775 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
5776 : OMPRTL__kmpc_reduce),
5777 Args);
5778
5779 // 5. Build switch(res)
5780 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5781 llvm::SwitchInst *SwInst =
5782 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5783
5784 // 6. Build case 1:
5785 // ...
5786 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5787 // ...
5788 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5789 // break;
5790 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5791 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5792 CGF.EmitBlock(Case1BB);
5793
5794 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5795 llvm::Value *EndArgs[] = {
5796 IdentTLoc, // ident_t *<loc>
5797 ThreadId, // i32 <gtid>
5798 Lock // kmp_critical_name *&<lock>
5799 };
5800 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5801 CodeGenFunction &CGF, PrePostActionTy &Action) {
5802 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5803 auto IPriv = Privates.begin();
5804 auto ILHS = LHSExprs.begin();
5805 auto IRHS = RHSExprs.begin();
5806 for (const Expr *E : ReductionOps) {
5807 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5808 cast<DeclRefExpr>(*IRHS));
5809 ++IPriv;
5810 ++ILHS;
5811 ++IRHS;
5812 }
5813 };
5814 RegionCodeGenTy RCG(CodeGen);
5815 CommonActionTy Action(
5816 nullptr, llvm::None,
5817 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
5818 : OMPRTL__kmpc_end_reduce),
5819 EndArgs);
5820 RCG.setAction(Action);
5821 RCG(CGF);
5822
5823 CGF.EmitBranch(DefaultBB);
5824
5825 // 7. Build case 2:
5826 // ...
5827 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5828 // ...
5829 // break;
5830 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5831 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5832 CGF.EmitBlock(Case2BB);
5833
5834 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5835 CodeGenFunction &CGF, PrePostActionTy &Action) {
5836 auto ILHS = LHSExprs.begin();
5837 auto IRHS = RHSExprs.begin();
5838 auto IPriv = Privates.begin();
5839 for (const Expr *E : ReductionOps) {
5840 const Expr *XExpr = nullptr;
5841 const Expr *EExpr = nullptr;
5842 const Expr *UpExpr = nullptr;
5843 BinaryOperatorKind BO = BO_Comma;
5844 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5845 if (BO->getOpcode() == BO_Assign) {
5846 XExpr = BO->getLHS();
5847 UpExpr = BO->getRHS();
5848 }
5849 }
5850 // Try to emit update expression as a simple atomic.
5851 const Expr *RHSExpr = UpExpr;
5852 if (RHSExpr) {
5853 // Analyze RHS part of the whole expression.
5854 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5855 RHSExpr->IgnoreParenImpCasts())) {
5856 // If this is a conditional operator, analyze its condition for
5857 // min/max reduction operator.
5858 RHSExpr = ACO->getCond();
5859 }
5860 if (const auto *BORHS =
5861 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5862 EExpr = BORHS->getRHS();
5863 BO = BORHS->getOpcode();
5864 }
5865 }
5866 if (XExpr) {
5867 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5868 auto &&AtomicRedGen = [BO, VD,
5869 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5870 const Expr *EExpr, const Expr *UpExpr) {
5871 LValue X = CGF.EmitLValue(XExpr);
5872 RValue E;
5873 if (EExpr)
5874 E = CGF.EmitAnyExpr(EExpr);
5875 CGF.EmitOMPAtomicSimpleUpdateExpr(
5876 X, E, BO, /*IsXLHSInRHSPart=*/true,
5877 llvm::AtomicOrdering::Monotonic, Loc,
5878 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5879 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5880 PrivateScope.addPrivate(
5881 VD, [&CGF, VD, XRValue, Loc]() {
5882 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5883 CGF.emitOMPSimpleStore(
5884 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5885 VD->getType().getNonReferenceType(), Loc);
5886 return LHSTemp;
5887 });
5888 (void)PrivateScope.Privatize();
5889 return CGF.EmitAnyExpr(UpExpr);
5890 });
5891 };
5892 if ((*IPriv)->getType()->isArrayType()) {
5893 // Emit atomic reduction for array section.
5894 const auto *RHSVar =
5895 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5896 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5897 AtomicRedGen, XExpr, EExpr, UpExpr);
5898 } else {
5899 // Emit atomic reduction for array subscript or single variable.
5900 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5901 }
5902 } else {
5903 // Emit as a critical region.
5904 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5905 const Expr *, const Expr *) {
5906 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5907 std::string Name = RT.getName({"atomic_reduction"});
5908 RT.emitCriticalRegion(
5909 CGF, Name,
5910 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5911 Action.Enter(CGF);
5912 emitReductionCombiner(CGF, E);
5913 },
5914 Loc);
5915 };
5916 if ((*IPriv)->getType()->isArrayType()) {
5917 const auto *LHSVar =
5918 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5919 const auto *RHSVar =
5920 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5921 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5922 CritRedGen);
5923 } else {
5924 CritRedGen(CGF, nullptr, nullptr, nullptr);
5925 }
5926 }
5927 ++ILHS;
5928 ++IRHS;
5929 ++IPriv;
5930 }
5931 };
5932 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5933 if (!WithNowait) {
5934 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5935 llvm::Value *EndArgs[] = {
5936 IdentTLoc, // ident_t *<loc>
5937 ThreadId, // i32 <gtid>
5938 Lock // kmp_critical_name *&<lock>
5939 };
5940 CommonActionTy Action(nullptr, llvm::None,
5941 createRuntimeFunction(OMPRTL__kmpc_end_reduce),
5942 EndArgs);
5943 AtomicRCG.setAction(Action);
5944 AtomicRCG(CGF);
5945 } else {
5946 AtomicRCG(CGF);
5947 }
5948
5949 CGF.EmitBranch(DefaultBB);
5950 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5951}
5952
5953/// Generates unique name for artificial threadprivate variables.
5954/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5955static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5956 const Expr *Ref) {
5957 SmallString<256> Buffer;
5958 llvm::raw_svector_ostream Out(Buffer);
5959 const clang::DeclRefExpr *DE;
5960 const VarDecl *D = ::getBaseDecl(Ref, DE);
5961 if (!D)
5962 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5963 D = D->getCanonicalDecl();
5964 std::string Name = CGM.getOpenMPRuntime().getName(
5965 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5966 Out << Prefix << Name << "_"
5967 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5968 return Out.str();
5969}
5970
5971/// Emits reduction initializer function:
5972/// \code
5973/// void @.red_init(void* %arg) {
5974/// %0 = bitcast void* %arg to <type>*
5975/// store <type> <init>, <type>* %0
5976/// ret void
5977/// }
5978/// \endcode
5979static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5980 SourceLocation Loc,
5981 ReductionCodeGen &RCG, unsigned N) {
5982 ASTContext &C = CGM.getContext();
5983 FunctionArgList Args;
5984 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5985 ImplicitParamDecl::Other);
5986 Args.emplace_back(&Param);
5987 const auto &FnInfo =
5988 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5989 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5990 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5991 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5992 Name, &CGM.getModule());
5993 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5994 Fn->setDoesNotRecurse();
5995 CodeGenFunction CGF(CGM);
5996 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5997 Address PrivateAddr = CGF.EmitLoadOfPointer(
5998 CGF.GetAddrOfLocalVar(&Param),
5999 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6000 llvm::Value *Size = nullptr;
6001 // If the size of the reduction item is non-constant, load it from global
6002 // threadprivate variable.
6003 if (RCG.getSizes(N).second) {
6004 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6005 CGF, CGM.getContext().getSizeType(),
6006 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6007 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6008 CGM.getContext().getSizeType(), Loc);
6009 }
6010 RCG.emitAggregateType(CGF, N, Size);
6011 LValue SharedLVal;
6012 // If initializer uses initializer from declare reduction construct, emit a
6013 // pointer to the address of the original reduction item (reuired by reduction
6014 // initializer)
6015 if (RCG.usesReductionInitializer(N)) {
6016 Address SharedAddr =
6017 CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6018 CGF, CGM.getContext().VoidPtrTy,
6019 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6020 SharedAddr = CGF.EmitLoadOfPointer(
6021 SharedAddr,
6022 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
6023 SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
6024 } else {
6025 SharedLVal = CGF.MakeNaturalAlignAddrLValue(
6026 llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
6027 CGM.getContext().VoidPtrTy);
6028 }
6029 // Emit the initializer:
6030 // %0 = bitcast void* %arg to <type>*
6031 // store <type> <init>, <type>* %0
6032 RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
6033 [](CodeGenFunction &) { return false; });
6034 CGF.FinishFunction();
6035 return Fn;
6036}
6037
6038/// Emits reduction combiner function:
6039/// \code
6040/// void @.red_comb(void* %arg0, void* %arg1) {
6041/// %lhs = bitcast void* %arg0 to <type>*
6042/// %rhs = bitcast void* %arg1 to <type>*
6043/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
6044/// store <type> %2, <type>* %lhs
6045/// ret void
6046/// }
6047/// \endcode
6048static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
6049 SourceLocation Loc,
6050 ReductionCodeGen &RCG, unsigned N,
6051 const Expr *ReductionOp,
6052 const Expr *LHS, const Expr *RHS,
6053 const Expr *PrivateRef) {
6054 ASTContext &C = CGM.getContext();
6055 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
6056 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
6057 FunctionArgList Args;
6058 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
6059 C.VoidPtrTy, ImplicitParamDecl::Other);
6060 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6061 ImplicitParamDecl::Other);
6062 Args.emplace_back(&ParamInOut);
6063 Args.emplace_back(&ParamIn);
6064 const auto &FnInfo =
6065 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6066 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6067 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
6068 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6069 Name, &CGM.getModule());
6070 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6071 Fn->setDoesNotRecurse();
6072 CodeGenFunction CGF(CGM);
6073 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6074 llvm::Value *Size = nullptr;
6075 // If the size of the reduction item is non-constant, load it from global
6076 // threadprivate variable.
6077 if (RCG.getSizes(N).second) {
6078 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6079 CGF, CGM.getContext().getSizeType(),
6080 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6081 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6082 CGM.getContext().getSizeType(), Loc);
6083 }
6084 RCG.emitAggregateType(CGF, N, Size);
6085 // Remap lhs and rhs variables to the addresses of the function arguments.
6086 // %lhs = bitcast void* %arg0 to <type>*
6087 // %rhs = bitcast void* %arg1 to <type>*
6088 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6089 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6090 // Pull out the pointer to the variable.
6091 Address PtrAddr = CGF.EmitLoadOfPointer(
6092 CGF.GetAddrOfLocalVar(&ParamInOut),
6093 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6094 return CGF.Builder.CreateElementBitCast(
6095 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6096 });
6097 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6098 // Pull out the pointer to the variable.
6099 Address PtrAddr = CGF.EmitLoadOfPointer(
6100 CGF.GetAddrOfLocalVar(&ParamIn),
6101 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6102 return CGF.Builder.CreateElementBitCast(
6103 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6104 });
6105 PrivateScope.Privatize();
6106 // Emit the combiner body:
6107 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6108 // store <type> %2, <type>* %lhs
6109 CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6110 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6111 cast<DeclRefExpr>(RHS));
6112 CGF.FinishFunction();
6113 return Fn;
6114}
6115
6116/// Emits reduction finalizer function:
6117/// \code
6118/// void @.red_fini(void* %arg) {
6119/// %0 = bitcast void* %arg to <type>*
6120/// <destroy>(<type>* %0)
6121/// ret void
6122/// }
6123/// \endcode
6124static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6125 SourceLocation Loc,
6126 ReductionCodeGen &RCG, unsigned N) {
6127 if (!RCG.needCleanups(N))
6128 return nullptr;
6129 ASTContext &C = CGM.getContext();
6130 FunctionArgList Args;
6131 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6132 ImplicitParamDecl::Other);
6133 Args.emplace_back(&Param);
6134 const auto &FnInfo =
6135 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6136 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6137 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6138 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6139 Name, &CGM.getModule());
6140 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6141 Fn->setDoesNotRecurse();
6142 CodeGenFunction CGF(CGM);
6143 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6144 Address PrivateAddr = CGF.EmitLoadOfPointer(
6145 CGF.GetAddrOfLocalVar(&Param),
6146 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6147 llvm::Value *Size = nullptr;
6148 // If the size of the reduction item is non-constant, load it from global
6149 // threadprivate variable.
6150 if (RCG.getSizes(N).second) {
6151 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6152 CGF, CGM.getContext().getSizeType(),
6153 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6154 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6155 CGM.getContext().getSizeType(), Loc);
6156 }
6157 RCG.emitAggregateType(CGF, N, Size);
6158 // Emit the finalizer body:
6159 // <destroy>(<type>* %0)
6160 RCG.emitCleanups(CGF, N, PrivateAddr);
6161 CGF.FinishFunction();
6162 return Fn;
6163}
6164
6165llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6166 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6167 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6168 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6169 return nullptr;
6170
6171 // Build typedef struct:
6172 // kmp_task_red_input {
6173 // void *reduce_shar; // shared reduction item
6174 // size_t reduce_size; // size of data item
6175 // void *reduce_init; // data initialization routine
6176 // void *reduce_fini; // data finalization routine
6177 // void *reduce_comb; // data combiner routine
6178 // kmp_task_red_flags_t flags; // flags for additional info from compiler
6179 // } kmp_task_red_input_t;
6180 ASTContext &C = CGM.getContext();
6181 RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
6182 RD->startDefinition();
6183 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6184 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6185 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6186 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6187 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6188 const FieldDecl *FlagsFD = addFieldToRecordDecl(
6189 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6190 RD->completeDefinition();
6191 QualType RDType = C.getRecordType(RD);
6192 unsigned Size = Data.ReductionVars.size();
6193 llvm::APInt ArraySize(/*numBits=*/64, Size);
6194 QualType ArrayRDType = C.getConstantArrayType(
6195 RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
6196 // kmp_task_red_input_t .rd_input.[Size];
6197 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6198 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
6199 Data.ReductionOps);
6200 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6201 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6202 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6203 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6204 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6205 TaskRedInput.getPointer(), Idxs,
6206 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6207 ".rd_input.gep.");
6208 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6209 // ElemLVal.reduce_shar = &Shareds[Cnt];
6210 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6211 RCG.emitSharedLValue(CGF, Cnt);
6212 llvm::Value *CastedShared =
6213 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer());
6214 CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6215 RCG.emitAggregateType(CGF, Cnt);
6216 llvm::Value *SizeValInChars;
6217 llvm::Value *SizeVal;
6218 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6219 // We use delayed creation/initialization for VLAs, array sections and
6220 // custom reduction initializations. It is required because runtime does not
6221 // provide the way to pass the sizes of VLAs/array sections to
6222 // initializer/combiner/finalizer functions and does not pass the pointer to
6223 // original reduction item to the initializer. Instead threadprivate global
6224 // variables are used to store these values and use them in the functions.
6225 bool DelayedCreation = !!SizeVal;
6226 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6227 /*isSigned=*/false);
6228 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6229 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6230 // ElemLVal.reduce_init = init;
6231 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6232 llvm::Value *InitAddr =
6233 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6234 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6235 DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
6236 // ElemLVal.reduce_fini = fini;
6237 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6238 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6239 llvm::Value *FiniAddr = Fini
6240 ? CGF.EmitCastToVoidPtr(Fini)
6241 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6242 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6243 // ElemLVal.reduce_comb = comb;
6244 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6245 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6246 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6247 RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6248 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6249 // ElemLVal.flags = 0;
6250 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6251 if (DelayedCreation) {
6252 CGF.EmitStoreOfScalar(
6253 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true),
6254 FlagsLVal);
6255 } else
6256 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
6257 }
6258 // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
6259 // *data);
6260 llvm::Value *Args[] = {
6261 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6262 /*isSigned=*/true),
6263 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6264 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6265 CGM.VoidPtrTy)};
6266 return CGF.EmitRuntimeCall(
6267 createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
6268}
6269
6270void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6271 SourceLocation Loc,
6272 ReductionCodeGen &RCG,
6273 unsigned N) {
6274 auto Sizes = RCG.getSizes(N);
6275 // Emit threadprivate global variable if the type is non-constant
6276 // (Sizes.second = nullptr).
6277 if (Sizes.second) {
6278 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6279 /*isSigned=*/false);
6280 Address SizeAddr = getAddrOfArtificialThreadPrivate(
6281 CGF, CGM.getContext().getSizeType(),
6282 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6283 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6284 }
6285 // Store address of the original reduction item if custom initializer is used.
6286 if (RCG.usesReductionInitializer(N)) {
6287 Address SharedAddr = getAddrOfArtificialThreadPrivate(
6288 CGF, CGM.getContext().VoidPtrTy,
6289 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6290 CGF.Builder.CreateStore(
6291 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6292 RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
6293 SharedAddr, /*IsVolatile=*/false);
6294 }
6295}
6296
6297Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6298 SourceLocation Loc,
6299 llvm::Value *ReductionsPtr,
6300 LValue SharedLVal) {
6301 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6302 // *d);
6303 llvm::Value *Args[] = {
6304 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6305 /*isSigned=*/true),
6306 ReductionsPtr,
6307 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(),
6308 CGM.VoidPtrTy)};
6309 return Address(
6310 CGF.EmitRuntimeCall(
6311 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
6312 SharedLVal.getAlignment());
6313}
6314
6315void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6316 SourceLocation Loc) {
6317 if (!CGF.HaveInsertPoint())
6318 return;
6319 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6320 // global_tid);
6321 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6322 // Ignore return result until untied tasks are supported.
6323 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
6324 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6325 Region->emitUntiedSwitch(CGF);
6326}
6327
6328void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6329 OpenMPDirectiveKind InnerKind,
6330 const RegionCodeGenTy &CodeGen,
6331 bool HasCancel) {
6332 if (!CGF.HaveInsertPoint())
6333 return;
6334 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6335 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6336}
6337
6338namespace {
6339enum RTCancelKind {
6340 CancelNoreq = 0,
6341 CancelParallel = 1,
6342 CancelLoop = 2,
6343 CancelSections = 3,
6344 CancelTaskgroup = 4
6345};
6346} // anonymous namespace
6347
6348static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6349 RTCancelKind CancelKind = CancelNoreq;
6350 if (CancelRegion == OMPD_parallel)
6351 CancelKind = CancelParallel;
6352 else if (CancelRegion == OMPD_for)
6353 CancelKind = CancelLoop;
6354 else if (CancelRegion == OMPD_sections)
6355 CancelKind = CancelSections;
6356 else {
6357 assert(CancelRegion == OMPD_taskgroup);
6358 CancelKind = CancelTaskgroup;
6359 }
6360 return CancelKind;
6361}
6362
6363void CGOpenMPRuntime::emitCancellationPointCall(
6364 CodeGenFunction &CGF, SourceLocation Loc,
6365 OpenMPDirectiveKind CancelRegion) {
6366 if (!CGF.HaveInsertPoint())
6367 return;
6368 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6369 // global_tid, kmp_int32 cncl_kind);
6370 if (auto *OMPRegionInfo =
6371 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6372 // For 'cancellation point taskgroup', the task region info may not have a
6373 // cancel. This may instead happen in another adjacent task.
6374 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6375 llvm::Value *Args[] = {
6376 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6377 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6378 // Ignore return result until untied tasks are supported.
6379 llvm::Value *Result = CGF.EmitRuntimeCall(
6380 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
6381 // if (__kmpc_cancellationpoint()) {
6382 // exit from construct;
6383 // }
6384 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6385 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6386 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6387 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6388 CGF.EmitBlock(ExitBB);
6389 // exit from construct;
6390 CodeGenFunction::JumpDest CancelDest =
6391 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6392 CGF.EmitBranchThroughCleanup(CancelDest);
6393 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6394 }
6395 }
6396}
6397
6398void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6399 const Expr *IfCond,
6400 OpenMPDirectiveKind CancelRegion) {
6401 if (!CGF.HaveInsertPoint())
6402 return;
6403 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6404 // kmp_int32 cncl_kind);
6405 if (auto *OMPRegionInfo =
6406 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6407 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6408 PrePostActionTy &) {
6409 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6410 llvm::Value *Args[] = {
6411 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6412 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6413 // Ignore return result until untied tasks are supported.
6414 llvm::Value *Result = CGF.EmitRuntimeCall(
6415 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
6416 // if (__kmpc_cancel()) {
6417 // exit from construct;
6418 // }
6419 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6420 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6421 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6422 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6423 CGF.EmitBlock(ExitBB);
6424 // exit from construct;
6425 CodeGenFunction::JumpDest CancelDest =
6426 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6427 CGF.EmitBranchThroughCleanup(CancelDest);
6428 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6429 };
6430 if (IfCond) {
6431 emitOMPIfClause(CGF, IfCond, ThenGen,
6432 [](CodeGenFunction &, PrePostActionTy &) {});
6433 } else {
6434 RegionCodeGenTy ThenRCG(ThenGen);
6435 ThenRCG(CGF);
6436 }
6437 }
6438}
6439
6440void CGOpenMPRuntime::emitTargetOutlinedFunction(
6441 const OMPExecutableDirective &D, StringRef ParentName,
6442 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6443 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6444 assert(!ParentName.empty() && "Invalid target region parent name!");
6445 HasEmittedTargetRegion = true;
6446 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6447 IsOffloadEntry, CodeGen);
6448}
6449
6450void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6451 const OMPExecutableDirective &D, StringRef ParentName,
6452 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6453 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6454 // Create a unique name for the entry function using the source location
6455 // information of the current target region. The name will be something like:
6456 //
6457 // __omp_offloading_DD_FFFF_PP_lBB
6458 //
6459 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6460 // mangled name of the function that encloses the target region and BB is the
6461 // line number of the target region.
6462
6463 unsigned DeviceID;
6464 unsigned FileID;
6465 unsigned Line;
6466 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6467 Line);
6468 SmallString<64> EntryFnName;
6469 {
6470 llvm::raw_svector_ostream OS(EntryFnName);
6471 OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6472 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6473 }
6474
6475 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6476
6477 CodeGenFunction CGF(CGM, true);
6478 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6479 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6480
6481 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
6482
6483 // If this target outline function is not an offload entry, we don't need to
6484 // register it.
6485 if (!IsOffloadEntry)
6486 return;
6487
6488 // The target region ID is used by the runtime library to identify the current
6489 // target region, so it only has to be unique and not necessarily point to
6490 // anything. It could be the pointer to the outlined function that implements
6491 // the target region, but we aren't using that so that the compiler doesn't
6492 // need to keep that, and could therefore inline the host function if proven
6493 // worthwhile during optimization. In the other hand, if emitting code for the
6494 // device, the ID has to be the function address so that it can retrieved from
6495 // the offloading entry and launched by the runtime library. We also mark the
6496 // outlined function to have external linkage in case we are emitting code for
6497 // the device, because these functions will be entry points to the device.
6498
6499 if (CGM.getLangOpts().OpenMPIsDevice) {
6500 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6501 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6502 OutlinedFn->setDSOLocal(false);
6503 } else {
6504 std::string Name = getName({EntryFnName, "region_id"});
6505 OutlinedFnID = new llvm::GlobalVariable(
6506 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6507 llvm::GlobalValue::WeakAnyLinkage,
6508 llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6509 }
6510
6511 // Register the information for the entry associated with this target region.
6512 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6513 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6514 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6515}
6516
6517/// Checks if the expression is constant or does not have non-trivial function
6518/// calls.
6519static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6520 // We can skip constant expressions.
6521 // We can skip expressions with trivial calls or simple expressions.
6522 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6523 !E->hasNonTrivialCall(Ctx)) &&
6524 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6525}
6526
6527const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6528 const Stmt *Body) {
6529 const Stmt *Child = Body->IgnoreContainers();
6530 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6531 Child = nullptr;
6532 for (const Stmt *S : C->body()) {
6533 if (const auto *E = dyn_cast<Expr>(S)) {
6534 if (isTrivial(Ctx, E))
6535 continue;
6536 }
6537 // Some of the statements can be ignored.
6538 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6539 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6540 continue;
6541 // Analyze declarations.
6542 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6543 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6544 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6545 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6546 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6547 isa<UsingDirectiveDecl>(D) ||
6548 isa<OMPDeclareReductionDecl>(D) ||
6549 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6550 return true;
6551 const auto *VD = dyn_cast<VarDecl>(D);
6552 if (!VD)
6553 return false;
6554 return VD->isConstexpr() ||
6555 ((VD->getType().isTrivialType(Ctx) ||
6556 VD->getType()->isReferenceType()) &&
6557 (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6558 }))
6559 continue;
6560 }
6561 // Found multiple children - cannot get the one child only.
6562 if (Child)
6563 return nullptr;
6564 Child = S;
6565 }
6566 if (Child)
6567 Child = Child->IgnoreContainers();
6568 }
6569 return Child;
6570}
6571
6572/// Emit the number of teams for a target directive. Inspect the num_teams
6573/// clause associated with a teams construct combined or closely nested
6574/// with the target directive.
6575///
6576/// Emit a team of size one for directives such as 'target parallel' that
6577/// have no associated teams construct.
6578///
6579/// Otherwise, return nullptr.
6580static llvm::Value *
6581emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6582 const OMPExecutableDirective &D) {
6583 assert(!CGF.getLangOpts().OpenMPIsDevice &&
6584 "Clauses associated with the teams directive expected to be emitted "
6585 "only for the host!");
6586 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6587 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6588 "Expected target-based executable directive.");
6589 CGBuilderTy &Bld = CGF.Builder;
6590 switch (DirectiveKind) {
6591 case OMPD_target: {
6592 const auto *CS = D.getInnermostCapturedStmt();
6593 const auto *Body =
6594 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6595 const Stmt *ChildStmt =
6596 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6597 if (const auto *NestedDir =
6598 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6599 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6600 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6601 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6602 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6603 const Expr *NumTeams =
6604 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6605 llvm::Value *NumTeamsVal =
6606 CGF.EmitScalarExpr(NumTeams,
6607 /*IgnoreResultAssign*/ true);
6608 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6609 /*IsSigned=*/true);
6610 }
6611 return Bld.getInt32(0);
6612 }
6613 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6614 isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6615 return Bld.getInt32(1);
6616 return Bld.getInt32(0);
6617 }
6618 return nullptr;
6619 }
6620 case OMPD_target_teams:
6621 case OMPD_target_teams_distribute:
6622 case OMPD_target_teams_distribute_simd:
6623 case OMPD_target_teams_distribute_parallel_for:
6624 case OMPD_target_teams_distribute_parallel_for_simd: {
6625 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6626 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6627 const Expr *NumTeams =
6628 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6629 llvm::Value *NumTeamsVal =
6630 CGF.EmitScalarExpr(NumTeams,
6631 /*IgnoreResultAssign*/ true);
6632 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6633 /*IsSigned=*/true);
6634 }
6635 return Bld.getInt32(0);
6636 }
6637 case OMPD_target_parallel:
6638 case OMPD_target_parallel_for:
6639 case OMPD_target_parallel_for_simd:
6640 case OMPD_target_simd:
6641 return Bld.getInt32(1);
6642 case OMPD_parallel:
6643 case OMPD_for:
6644 case OMPD_parallel_for:
6645 case OMPD_parallel_sections:
6646 case OMPD_for_simd:
6647 case OMPD_parallel_for_simd:
6648 case OMPD_cancel:
6649 case OMPD_cancellation_point:
6650 case OMPD_ordered:
6651 case OMPD_threadprivate:
6652 case OMPD_allocate:
6653 case OMPD_task:
6654 case OMPD_simd:
6655 case OMPD_sections:
6656 case OMPD_section:
6657 case OMPD_single:
6658 case OMPD_master:
6659 case OMPD_critical:
6660 case OMPD_taskyield:
6661 case OMPD_barrier:
6662 case OMPD_taskwait:
6663 case OMPD_taskgroup:
6664 case OMPD_atomic:
6665 case OMPD_flush:
6666 case OMPD_teams:
6667 case OMPD_target_data:
6668 case OMPD_target_exit_data:
6669 case OMPD_target_enter_data:
6670 case OMPD_distribute:
6671 case OMPD_distribute_simd:
6672 case OMPD_distribute_parallel_for:
6673 case OMPD_distribute_parallel_for_simd:
6674 case OMPD_teams_distribute:
6675 case OMPD_teams_distribute_simd:
6676 case OMPD_teams_distribute_parallel_for:
6677 case OMPD_teams_distribute_parallel_for_simd:
6678 case OMPD_target_update:
6679 case OMPD_declare_simd:
6680 case OMPD_declare_target:
6681 case OMPD_end_declare_target:
6682 case OMPD_declare_reduction:
6683 case OMPD_declare_mapper:
6684 case OMPD_taskloop:
6685 case OMPD_taskloop_simd:
6686 case OMPD_requires:
6687 case OMPD_unknown:
6688 break;
6689 }
6690 llvm_unreachable("Unexpected directive kind.");
6691}
6692
6693static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6694 llvm::Value *DefaultThreadLimitVal) {
6695 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6696 CGF.getContext(), CS->getCapturedStmt());
6697 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6698 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6699 llvm::Value *NumThreads = nullptr;
6700 llvm::Value *CondVal = nullptr;
6701 // Handle if clause. If if clause present, the number of threads is
6702 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6703 if (Dir->hasClausesOfKind<OMPIfClause>()) {
6704 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6705 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6706 const OMPIfClause *IfClause = nullptr;
6707 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6708 if (C->getNameModifier() == OMPD_unknown ||
6709 C->getNameModifier() == OMPD_parallel) {
6710 IfClause = C;
6711 break;
6712 }
6713 }
6714 if (IfClause) {
6715 const Expr *Cond = IfClause->getCondition();
6716 bool Result;
6717 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6718 if (!Result)
6719 return CGF.Builder.getInt32(1);
6720 } else {
6721 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6722 if (const auto *PreInit =
6723 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6724 for (const auto *I : PreInit->decls()) {
6725 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6726 CGF.EmitVarDecl(cast<VarDecl>(*I));
6727 } else {
6728 CodeGenFunction::AutoVarEmission Emission =
6729 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6730 CGF.EmitAutoVarCleanups(Emission);
6731 }
6732 }
6733 }
6734 CondVal = CGF.EvaluateExprAsBool(Cond);
6735 }
6736 }
6737 }
6738 // Check the value of num_threads clause iff if clause was not specified
6739 // or is not evaluated to false.
6740 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6741 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6742 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6743 const auto *NumThreadsClause =
6744 Dir->getSingleClause<OMPNumThreadsClause>();
6745 CodeGenFunction::LexicalScope Scope(
6746 CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6747 if (const auto *PreInit =
6748 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6749 for (const auto *I : PreInit->decls()) {
6750 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6751 CGF.EmitVarDecl(cast<VarDecl>(*I));
6752 } else {
6753 CodeGenFunction::AutoVarEmission Emission =
6754 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6755 CGF.EmitAutoVarCleanups(Emission);
6756 }
6757 }
6758 }
6759 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6760 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6761 /*IsSigned=*/false);
6762 if (DefaultThreadLimitVal)
6763 NumThreads = CGF.Builder.CreateSelect(
6764 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6765 DefaultThreadLimitVal, NumThreads);
6766 } else {
6767 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6768 : CGF.Builder.getInt32(0);
6769 }
6770 // Process condition of the if clause.
6771 if (CondVal) {
6772 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6773 CGF.Builder.getInt32(1));
6774 }
6775 return NumThreads;
6776 }
6777 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6778 return CGF.Builder.getInt32(1);
6779 return DefaultThreadLimitVal;
6780 }
6781 return DefaultThreadLimitVal ? DefaultThreadLimitVal
6782 : CGF.Builder.getInt32(0);
6783}
6784
6785/// Emit the number of threads for a target directive. Inspect the
6786/// thread_limit clause associated with a teams construct combined or closely
6787/// nested with the target directive.
6788///
6789/// Emit the num_threads clause for directives such as 'target parallel' that
6790/// have no associated teams construct.
6791///
6792/// Otherwise, return nullptr.
6793static llvm::Value *
6794emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6795 const OMPExecutableDirective &D) {
6796 assert(!CGF.getLangOpts().OpenMPIsDevice &&
6797 "Clauses associated with the teams directive expected to be emitted "
6798 "only for the host!");
6799 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6800 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6801 "Expected target-based executable directive.");
6802 CGBuilderTy &Bld = CGF.Builder;
6803 llvm::Value *ThreadLimitVal = nullptr;
6804 llvm::Value *NumThreadsVal = nullptr;
6805 switch (DirectiveKind) {
6806 case OMPD_target: {
6807 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6808 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6809 return NumThreads;
6810 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6811 CGF.getContext(), CS->getCapturedStmt());
6812 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6813 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6814 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6815 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6816 const auto *ThreadLimitClause =
6817 Dir->getSingleClause<OMPThreadLimitClause>();
6818 CodeGenFunction::LexicalScope Scope(
6819 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6820 if (const auto *PreInit =
6821 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6822 for (const auto *I : PreInit->decls()) {
6823 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6824 CGF.EmitVarDecl(cast<VarDecl>(*I));
6825 } else {
6826 CodeGenFunction::AutoVarEmission Emission =
6827 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6828 CGF.EmitAutoVarCleanups(Emission);
6829 }
6830 }
6831 }
6832 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6833 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6834 ThreadLimitVal =
6835 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false);
6836 }
6837 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6838 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6839 CS = Dir->getInnermostCapturedStmt();
6840 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6841 CGF.getContext(), CS->getCapturedStmt());
6842 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6843 }
6844 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6845 !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6846 CS = Dir->getInnermostCapturedStmt();
6847 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6848 return NumThreads;
6849 }
6850 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6851 return Bld.getInt32(1);
6852 }
6853 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6854 }
6855 case OMPD_target_teams: {
6856 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6857 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6858 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6859 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6860 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6861 ThreadLimitVal =
6862 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false);
6863 }
6864 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6865 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6866 return NumThreads;
6867 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6868 CGF.getContext(), CS->getCapturedStmt());
6869 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6870 if (Dir->getDirectiveKind() == OMPD_distribute) {
6871 CS = Dir->getInnermostCapturedStmt();
6872 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6873 return NumThreads;
6874 }
6875 }
6876 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6877 }
6878 case OMPD_target_teams_distribute:
6879 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6880 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6881 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6882 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6883 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6884 ThreadLimitVal =
6885 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false);
6886 }
6887 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6888 case OMPD_target_parallel:
6889 case OMPD_target_parallel_for:
6890 case OMPD_target_parallel_for_simd:
6891 case OMPD_target_teams_distribute_parallel_for:
6892 case OMPD_target_teams_distribute_parallel_for_simd: {
6893 llvm::Value *CondVal = nullptr;
6894 // Handle if clause. If if clause present, the number of threads is
6895 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6896 if (D.hasClausesOfKind<OMPIfClause>()) {
6897 const OMPIfClause *IfClause = nullptr;
6898 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6899 if (C->getNameModifier() == OMPD_unknown ||
6900 C->getNameModifier() == OMPD_parallel) {
6901 IfClause = C;
6902 break;
6903 }
6904 }
6905 if (IfClause) {
6906 const Expr *Cond = IfClause->getCondition();
6907 bool Result;
6908 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6909 if (!Result)
6910 return Bld.getInt32(1);
6911 } else {
6912 CodeGenFunction::RunCleanupsScope Scope(CGF);
6913 CondVal = CGF.EvaluateExprAsBool(Cond);
6914 }
6915 }
6916 }
6917 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6918 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6919 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6920 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6921 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6922 ThreadLimitVal =
6923 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false);
6924 }
6925 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6926 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6927 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6928 llvm::Value *NumThreads = CGF.EmitScalarExpr(
6929 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6930 NumThreadsVal =
6931 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/false);
6932 ThreadLimitVal = ThreadLimitVal
6933 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6934 ThreadLimitVal),
6935 NumThreadsVal, ThreadLimitVal)
6936 : NumThreadsVal;
6937 }
6938 if (!ThreadLimitVal)
6939 ThreadLimitVal = Bld.getInt32(0);
6940 if (CondVal)
6941 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6942 return ThreadLimitVal;
6943 }
6944 case OMPD_target_teams_distribute_simd:
6945 case OMPD_target_simd:
6946 return Bld.getInt32(1);
6947 case OMPD_parallel:
6948 case OMPD_for:
6949 case OMPD_parallel_for:
6950 case OMPD_parallel_sections:
6951 case OMPD_for_simd:
6952 case OMPD_parallel_for_simd:
6953 case OMPD_cancel:
6954 case OMPD_cancellation_point:
6955 case OMPD_ordered:
6956 case OMPD_threadprivate:
6957 case OMPD_allocate:
6958 case OMPD_task:
6959 case OMPD_simd:
6960 case OMPD_sections:
6961 case OMPD_section:
6962 case OMPD_single:
6963 case OMPD_master:
6964 case OMPD_critical:
6965 case OMPD_taskyield:
6966 case OMPD_barrier:
6967 case OMPD_taskwait:
6968 case OMPD_taskgroup:
6969 case OMPD_atomic:
6970 case OMPD_flush:
6971 case OMPD_teams:
6972 case OMPD_target_data:
6973 case OMPD_target_exit_data:
6974 case OMPD_target_enter_data:
6975 case OMPD_distribute:
6976 case OMPD_distribute_simd:
6977 case OMPD_distribute_parallel_for:
6978 case OMPD_distribute_parallel_for_simd:
6979 case OMPD_teams_distribute:
6980 case OMPD_teams_distribute_simd:
6981 case OMPD_teams_distribute_parallel_for:
6982 case OMPD_teams_distribute_parallel_for_simd:
6983 case OMPD_target_update:
6984 case OMPD_declare_simd:
6985 case OMPD_declare_target:
6986 case OMPD_end_declare_target:
6987 case OMPD_declare_reduction:
6988 case OMPD_declare_mapper:
6989 case OMPD_taskloop:
6990 case OMPD_taskloop_simd:
6991 case OMPD_requires:
6992 case OMPD_unknown:
6993 break;
6994 }
6995 llvm_unreachable("Unsupported directive kind.");
6996}
6997
6998namespace {
6999LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7000
7001// Utility to handle information from clauses associated with a given
7002// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7003// It provides a convenient interface to obtain the information and generate
7004// code for that information.
7005class MappableExprsHandler {
7006public:
7007 /// Values for bit flags used to specify the mapping type for
7008 /// offloading.
7009 enum OpenMPOffloadMappingFlags : uint64_t {
7010 /// No flags
7011 OMP_MAP_NONE = 0x0,
7012 /// Allocate memory on the device and move data from host to device.
7013 OMP_MAP_TO = 0x01,
7014 /// Allocate memory on the device and move data from device to host.
7015 OMP_MAP_FROM = 0x02,
7016 /// Always perform the requested mapping action on the element, even
7017 /// if it was already mapped before.
7018 OMP_MAP_ALWAYS = 0x04,
7019 /// Delete the element from the device environment, ignoring the
7020 /// current reference count associated with the element.
7021 OMP_MAP_DELETE = 0x08,
7022 /// The element being mapped is a pointer-pointee pair; both the
7023 /// pointer and the pointee should be mapped.
7024 OMP_MAP_PTR_AND_OBJ = 0x10,
7025 /// This flags signals that the base address of an entry should be
7026 /// passed to the target kernel as an argument.
7027 OMP_MAP_TARGET_PARAM = 0x20,
7028 /// Signal that the runtime library has to return the device pointer
7029 /// in the current position for the data being mapped. Used when we have the
7030 /// use_device_ptr clause.
7031 OMP_MAP_RETURN_PARAM = 0x40,
7032 /// This flag signals that the reference being passed is a pointer to
7033 /// private data.
7034 OMP_MAP_PRIVATE = 0x80,
7035 /// Pass the element to the device by value.
7036 OMP_MAP_LITERAL = 0x100,
7037 /// Implicit map
7038 OMP_MAP_IMPLICIT = 0x200,
7039 /// The 16 MSBs of the flags indicate whether the entry is member of some
7040 /// struct/class.
7041 OMP_MAP_MEMBER_OF = 0xffff000000000000,
7042 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7043 };
7044
7045 /// Class that associates information with a base pointer to be passed to the
7046 /// runtime library.
7047 class BasePointerInfo {
7048 /// The base pointer.
7049 llvm::Value *Ptr = nullptr;
7050 /// The base declaration that refers to this device pointer, or null if
7051 /// there is none.
7052 const ValueDecl *DevPtrDecl = nullptr;
7053
7054 public:
7055 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7056 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7057 llvm::Value *operator*() const { return Ptr; }
7058 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7059 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7060 };
7061
7062 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7063 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7064 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7065
7066 /// Map between a struct and the its lowest & highest elements which have been
7067 /// mapped.
7068 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7069 /// HE(FieldIndex, Pointer)}
7070 struct StructRangeInfoTy {
7071 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7072 0, Address::invalid()};
7073 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7074 0, Address::invalid()};
7075 Address Base = Address::invalid();
7076 };
7077
7078private:
7079 /// Kind that defines how a device pointer has to be returned.
7080 struct MapInfo {
7081 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7082 OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7083 ArrayRef<OpenMPMapModifierKind> MapModifiers;
7084 bool ReturnDevicePointer = false;
7085 bool IsImplicit = false;
7086
7087 MapInfo() = default;
7088 MapInfo(
7089 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7090 OpenMPMapClauseKind MapType,
7091 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7092 bool ReturnDevicePointer, bool IsImplicit)
7093 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7094 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
7095 };
7096
7097 /// If use_device_ptr is used on a pointer which is a struct member and there
7098 /// is no map information about it, then emission of that entry is deferred
7099 /// until the whole struct has been processed.
7100 struct DeferredDevicePtrEntryTy {
7101 const Expr *IE = nullptr;
7102 const ValueDecl *VD = nullptr;
7103
7104 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
7105 : IE(IE), VD(VD) {}
7106 };
7107
7108 /// Directive from where the map clauses were extracted.
7109 const OMPExecutableDirective &CurDir;
7110
7111 /// Function the directive is being generated for.
7112 CodeGenFunction &CGF;
7113
7114 /// Set of all first private variables in the current directive.
7115 llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls;
7116
7117 /// Map between device pointer declarations and their expression components.
7118 /// The key value for declarations in 'this' is null.
7119 llvm::DenseMap<
7120 const ValueDecl *,
7121 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7122 DevPointersMap;
7123
7124 llvm::Value *getExprTypeSize(const Expr *E) const {
7125 QualType ExprTy = E->getType().getCanonicalType();
7126
7127 // Reference types are ignored for mapping purposes.
7128 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7129 ExprTy = RefTy->getPointeeType().getCanonicalType();
7130
7131 // Given that an array section is considered a built-in type, we need to
7132 // do the calculation based on the length of the section instead of relying
7133 // on CGF.getTypeSize(E->getType()).
7134 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7135 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7136 OAE->getBase()->IgnoreParenImpCasts())
7137 .getCanonicalType();
7138
7139 // If there is no length associated with the expression, that means we
7140 // are using the whole length of the base.
7141 if (!OAE->getLength() && OAE->getColonLoc().isValid())
7142 return CGF.getTypeSize(BaseTy);
7143
7144 llvm::Value *ElemSize;
7145 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7146 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7147 } else {
7148 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7149 assert(ATy && "Expecting array type if not a pointer type.");
7150 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7151 }
7152
7153 // If we don't have a length at this point, that is because we have an
7154 // array section with a single element.
7155 if (!OAE->getLength())
7156 return ElemSize;
7157
7158 llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
7159 LengthVal =
7160 CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
7161 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7162 }
7163 return CGF.getTypeSize(ExprTy);
7164 }
7165
7166 /// Return the corresponding bits for a given map clause modifier. Add
7167 /// a flag marking the map as a pointer if requested. Add a flag marking the
7168 /// map as the first one of a series of maps that relate to the same map
7169 /// expression.
7170 OpenMPOffloadMappingFlags getMapTypeBits(
7171 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7172 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7173 OpenMPOffloadMappingFlags Bits =
7174 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7175 switch (MapType) {
7176 case OMPC_MAP_alloc:
7177 case OMPC_MAP_release:
7178 // alloc and release is the default behavior in the runtime library, i.e.
7179 // if we don't pass any bits alloc/release that is what the runtime is
7180 // going to do. Therefore, we don't need to signal anything for these two
7181 // type modifiers.
7182 break;
7183 case OMPC_MAP_to:
7184 Bits |= OMP_MAP_TO;
7185 break;
7186 case OMPC_MAP_from:
7187 Bits |= OMP_MAP_FROM;
7188 break;
7189 case OMPC_MAP_tofrom:
7190 Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7191 break;
7192 case OMPC_MAP_delete:
7193 Bits |= OMP_MAP_DELETE;
7194 break;
7195 case OMPC_MAP_unknown:
7196 llvm_unreachable("Unexpected map type!");
7197 }
7198 if (AddPtrFlag)
7199 Bits |= OMP_MAP_PTR_AND_OBJ;
7200 if (AddIsTargetParamFlag)
7201 Bits |= OMP_MAP_TARGET_PARAM;
7202 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7203 != MapModifiers.end())
7204 Bits |= OMP_MAP_ALWAYS;
7205 return Bits;
7206 }
7207
7208 /// Return true if the provided expression is a final array section. A
7209 /// final array section, is one whose length can't be proved to be one.
7210 bool isFinalArraySectionExpression(const Expr *E) const {
7211 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7212
7213 // It is not an array section and therefore not a unity-size one.
7214 if (!OASE)
7215 return false;
7216
7217 // An array section with no colon always refer to a single element.
7218 if (OASE->getColonLoc().isInvalid())
7219 return false;
7220
7221 const Expr *Length = OASE->getLength();
7222
7223 // If we don't have a length we have to check if the array has size 1
7224 // for this dimension. Also, we should always expect a length if the
7225 // base type is pointer.
7226 if (!Length) {
7227 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7228 OASE->getBase()->IgnoreParenImpCasts())
7229 .getCanonicalType();
7230 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7231 return ATy->getSize().getSExtValue() != 1;
7232 // If we don't have a constant dimension length, we have to consider
7233 // the current section as having any size, so it is not necessarily
7234 // unitary. If it happen to be unity size, that's user fault.
7235 return true;
7236 }
7237
7238 // Check if the length evaluates to 1.
7239 Expr::EvalResult Result;
7240 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7241 return true; // Can have more that size 1.
7242
7243 llvm::APSInt ConstLength = Result.Val.getInt();
7244 return ConstLength.getSExtValue() != 1;
7245 }
7246
7247 /// Generate the base pointers, section pointers, sizes and map type
7248 /// bits for the provided map type, map modifier, and expression components.
7249 /// \a IsFirstComponent should be set to true if the provided set of
7250 /// components is the first associated with a capture.
7251 void generateInfoForComponentList(
7252 OpenMPMapClauseKind MapType,
7253 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7254 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7255 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7256 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7257 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7258 bool IsImplicit,
7259 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7260 OverlappedElements = llvm::None) const {
7261 // The following summarizes what has to be generated for each map and the
7262 // types below. The generated information is expressed in this order:
7263 // base pointer, section pointer, size, flags
7264 // (to add to the ones that come from the map type and modifier).
7265 //
7266 // double d;
7267 // int i[100];
7268 // float *p;
7269 //
7270 // struct S1 {
7271 // int i;
7272 // float f[50];
7273 // }
7274 // struct S2 {
7275 // int i;
7276 // float f[50];
7277 // S1 s;
7278 // double *p;
7279 // struct S2 *ps;
7280 // }
7281 // S2 s;
7282 // S2 *ps;
7283 //
7284 // map(d)
7285 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7286 //
7287 // map(i)
7288 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7289 //
7290 // map(i[1:23])
7291 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7292 //
7293 // map(p)
7294 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7295 //
7296 // map(p[1:24])
7297 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7298 //
7299 // map(s)
7300 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7301 //
7302 // map(s.i)
7303 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7304 //
7305 // map(s.s.f)
7306 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7307 //
7308 // map(s.p)
7309 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7310 //
7311 // map(to: s.p[:22])
7312 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7313 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7314 // &(s.p), &(s.p[0]), 22*sizeof(double),
7315 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7316 // (*) alloc space for struct members, only this is a target parameter
7317 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7318 // optimizes this entry out, same in the examples below)
7319 // (***) map the pointee (map: to)
7320 //
7321 // map(s.ps)
7322 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7323 //
7324 // map(from: s.ps->s.i)
7325 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7326 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7327 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7328 //
7329 // map(to: s.ps->ps)
7330 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7331 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7332 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7333 //
7334 // map(s.ps->ps->ps)
7335 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7336 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7337 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7338 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7339 //
7340 // map(to: s.ps->ps->s.f[:22])
7341 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7342 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7343 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7344 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7345 //
7346 // map(ps)
7347 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7348 //
7349 // map(ps->i)
7350 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7351 //
7352 // map(ps->s.f)
7353 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7354 //
7355 // map(from: ps->p)
7356 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7357 //
7358 // map(to: ps->p[:22])
7359 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7360 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7361 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7362 //
7363 // map(ps->ps)
7364 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7365 //
7366 // map(from: ps->ps->s.i)
7367 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7368 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7369 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7370 //
7371 // map(from: ps->ps->ps)
7372 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7373 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7374 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7375 //
7376 // map(ps->ps->ps->ps)
7377 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7378 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7379 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7380 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7381 //
7382 // map(to: ps->ps->ps->s.f[:22])
7383 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7384 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7385 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7386 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7387 //
7388 // map(to: s.f[:22]) map(from: s.p[:33])
7389 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7390 // sizeof(double*) (**), TARGET_PARAM
7391 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7392 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7393 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7394 // (*) allocate contiguous space needed to fit all mapped members even if
7395 // we allocate space for members not mapped (in this example,
7396 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7397 // them as well because they fall between &s.f[0] and &s.p)
7398 //
7399 // map(from: s.f[:22]) map(to: ps->p[:33])
7400 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7401 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7402 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7403 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7404 // (*) the struct this entry pertains to is the 2nd element in the list of
7405 // arguments, hence MEMBER_OF(2)
7406 //
7407 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7408 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7409 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7410 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7411 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7412 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7413 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7414 // (*) the struct this entry pertains to is the 4th element in the list
7415 // of arguments, hence MEMBER_OF(4)
7416
7417 // Track if the map information being generated is the first for a capture.
7418 bool IsCaptureFirstInfo = IsFirstComponentList;
7419 bool IsLink = false; // Is this variable a "declare target link"?
7420
7421 // Scan the components from the base to the complete expression.
7422 auto CI = Components.rbegin();
7423 auto CE = Components.rend();
7424 auto I = CI;
7425
7426 // Track if the map information being generated is the first for a list of
7427 // components.
7428 bool IsExpressionFirstInfo = true;
7429 Address BP = Address::invalid();
7430 const Expr *AssocExpr = I->getAssociatedExpression();
7431 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7432 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7433
7434 if (isa<MemberExpr>(AssocExpr)) {
7435 // The base is the 'this' pointer. The content of the pointer is going
7436 // to be the base of the field being mapped.
7437 BP = CGF.LoadCXXThisAddress();
7438 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7439 (OASE &&
7440 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7441 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7442 } else {
7443 // The base is the reference to the variable.
7444 // BP = &Var.
7445 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7446 if (const auto *VD =
7447 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7448 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7449 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
7450 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) {
7451 IsLink = true;
7452 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD);
7453 }
7454 }
7455
7456 // If the variable is a pointer and is being dereferenced (i.e. is not
7457 // the last component), the base has to be the pointer itself, not its
7458 // reference. References are ignored for mapping purposes.
7459 QualType Ty =
7460 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7461 if (Ty->isAnyPointerType() && std::next(I) != CE) {
7462 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7463
7464 // We do not need to generate individual map information for the
7465 // pointer, it can be associated with the combined storage.
7466 ++I;
7467 }
7468 }
7469
7470 // Track whether a component of the list should be marked as MEMBER_OF some
7471 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7472 // in a component list should be marked as MEMBER_OF, all subsequent entries
7473 // do not belong to the base struct. E.g.
7474 // struct S2 s;
7475 // s.ps->ps->ps->f[:]
7476 // (1) (2) (3) (4)
7477 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7478 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7479 // is the pointee of ps(2) which is not member of struct s, so it should not
7480 // be marked as such (it is still PTR_AND_OBJ).
7481 // The variable is initialized to false so that PTR_AND_OBJ entries which
7482 // are not struct members are not considered (e.g. array of pointers to
7483 // data).
7484 bool ShouldBeMemberOf = false;
7485
7486 // Variable keeping track of whether or not we have encountered a component
7487 // in the component list which is a member expression. Useful when we have a
7488 // pointer or a final array section, in which case it is the previous
7489 // component in the list which tells us whether we have a member expression.
7490 // E.g. X.f[:]
7491 // While processing the final array section "[:]" it is "f" which tells us
7492 // whether we are dealing with a member of a declared struct.
7493 const MemberExpr *EncounteredME = nullptr;
7494
7495 for (; I != CE; ++I) {
7496 // If the current component is member of a struct (parent struct) mark it.
7497 if (!EncounteredME) {
7498 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7499 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7500 // as MEMBER_OF the parent struct.
7501 if (EncounteredME)
7502 ShouldBeMemberOf = true;
7503 }
7504
7505 auto Next = std::next(I);
7506
7507 // We need to generate the addresses and sizes if this is the last
7508 // component, if the component is a pointer or if it is an array section
7509 // whose length can't be proved to be one. If this is a pointer, it
7510 // becomes the base address for the following components.
7511
7512 // A final array section, is one whose length can't be proved to be one.
7513 bool IsFinalArraySection =
7514 isFinalArraySectionExpression(I->getAssociatedExpression());
7515
7516 // Get information on whether the element is a pointer. Have to do a
7517 // special treatment for array sections given that they are built-in
7518 // types.
7519 const auto *OASE =
7520 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7521 bool IsPointer =
7522 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7523 .getCanonicalType()
7524 ->isAnyPointerType()) ||
7525 I->getAssociatedExpression()->getType()->isAnyPointerType();
7526
7527 if (Next == CE || IsPointer || IsFinalArraySection) {
7528 // If this is not the last component, we expect the pointer to be
7529 // associated with an array expression or member expression.
7530 assert((Next == CE ||
7531 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7532 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7533 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
7534 "Unexpected expression");
7535
7536 Address LB =
7537 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress();
7538
7539 // If this component is a pointer inside the base struct then we don't
7540 // need to create any entry for it - it will be combined with the object
7541 // it is pointing to into a single PTR_AND_OBJ entry.
7542 bool IsMemberPointer =
7543 IsPointer && EncounteredME &&
7544 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7545 EncounteredME);
7546 if (!OverlappedElements.empty()) {
7547 // Handle base element with the info for overlapped elements.
7548 assert(!PartialStruct.Base.isValid() && "The base element is set.");
7549 assert(Next == CE &&
7550 "Expected last element for the overlapped elements.");
7551 assert(!IsPointer &&
7552 "Unexpected base element with the pointer type.");
7553 // Mark the whole struct as the struct that requires allocation on the
7554 // device.
7555 PartialStruct.LowestElem = {0, LB};
7556 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7557 I->getAssociatedExpression()->getType());
7558 Address HB = CGF.Builder.CreateConstGEP(
7559 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7560 CGF.VoidPtrTy),
7561 TypeSize.getQuantity() - 1);
7562 PartialStruct.HighestElem = {
7563 std::numeric_limits<decltype(
7564 PartialStruct.HighestElem.first)>::max(),
7565 HB};
7566 PartialStruct.Base = BP;
7567 // Emit data for non-overlapped data.
7568 OpenMPOffloadMappingFlags Flags =
7569 OMP_MAP_MEMBER_OF |
7570 getMapTypeBits(MapType, MapModifiers, IsImplicit,
7571 /*AddPtrFlag=*/false,
7572 /*AddIsTargetParamFlag=*/false);
7573 LB = BP;
7574 llvm::Value *Size = nullptr;
7575 // Do bitcopy of all non-overlapped structure elements.
7576 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7577 Component : OverlappedElements) {
7578 Address ComponentLB = Address::invalid();
7579 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7580 Component) {
7581 if (MC.getAssociatedDeclaration()) {
7582 ComponentLB =
7583 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7584 .getAddress();
7585 Size = CGF.Builder.CreatePtrDiff(
7586 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7587 CGF.EmitCastToVoidPtr(LB.getPointer()));
7588 break;
7589 }
7590 }
7591 BasePointers.push_back(BP.getPointer());
7592 Pointers.push_back(LB.getPointer());
7593 Sizes.push_back(Size);
7594 Types.push_back(Flags);
7595 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7596 }
7597 BasePointers.push_back(BP.getPointer());
7598 Pointers.push_back(LB.getPointer());
7599 Size = CGF.Builder.CreatePtrDiff(
7600 CGF.EmitCastToVoidPtr(
7601 CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7602 CGF.EmitCastToVoidPtr(LB.getPointer()));
7603 Sizes.push_back(Size);
7604 Types.push_back(Flags);
7605 break;
7606 }
7607 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7608 if (!IsMemberPointer) {
7609 BasePointers.push_back(BP.getPointer());
7610 Pointers.push_back(LB.getPointer());
7611 Sizes.push_back(Size);
7612
7613 // We need to add a pointer flag for each map that comes from the
7614 // same expression except for the first one. We also need to signal
7615 // this map is the first one that relates with the current capture
7616 // (there is a set of entries for each capture).
7617 OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7618 MapType, MapModifiers, IsImplicit,
7619 !IsExpressionFirstInfo || IsLink, IsCaptureFirstInfo && !IsLink);
7620
7621 if (!IsExpressionFirstInfo) {
7622 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7623 // then we reset the TO/FROM/ALWAYS/DELETE flags.
7624 if (IsPointer)
7625 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7626 OMP_MAP_DELETE);
7627
7628 if (ShouldBeMemberOf) {
7629 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7630 // should be later updated with the correct value of MEMBER_OF.
7631 Flags |= OMP_MAP_MEMBER_OF;
7632 // From now on, all subsequent PTR_AND_OBJ entries should not be
7633 // marked as MEMBER_OF.
7634 ShouldBeMemberOf = false;
7635 }
7636 }
7637
7638 Types.push_back(Flags);
7639 }
7640
7641 // If we have encountered a member expression so far, keep track of the
7642 // mapped member. If the parent is "*this", then the value declaration
7643 // is nullptr.
7644 if (EncounteredME) {
7645 const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl());
7646 unsigned FieldIndex = FD->getFieldIndex();
7647
7648 // Update info about the lowest and highest elements for this struct
7649 if (!PartialStruct.Base.isValid()) {
7650 PartialStruct.LowestElem = {FieldIndex, LB};
7651 PartialStruct.HighestElem = {FieldIndex, LB};
7652 PartialStruct.Base = BP;
7653 } else if (FieldIndex < PartialStruct.LowestElem.first) {
7654 PartialStruct.LowestElem = {FieldIndex, LB};
7655 } else if (FieldIndex > PartialStruct.HighestElem.first) {
7656 PartialStruct.HighestElem = {FieldIndex, LB};
7657 }
7658 }
7659
7660 // If we have a final array section, we are done with this expression.
7661 if (IsFinalArraySection)
7662 break;
7663
7664 // The pointer becomes the base for the next element.
7665 if (Next != CE)
7666 BP = LB;
7667
7668 IsExpressionFirstInfo = false;
7669 IsCaptureFirstInfo = false;
7670 }
7671 }
7672 }
7673
7674 /// Return the adjusted map modifiers if the declaration a capture refers to
7675 /// appears in a first-private clause. This is expected to be used only with
7676 /// directives that start with 'target'.
7677 MappableExprsHandler::OpenMPOffloadMappingFlags
7678 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7679 assert(Cap.capturesVariable() && "Expected capture by reference only!");
7680
7681 // A first private variable captured by reference will use only the
7682 // 'private ptr' and 'map to' flag. Return the right flags if the captured
7683 // declaration is known as first-private in this handler.
7684 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7685 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7686 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7687 return MappableExprsHandler::OMP_MAP_ALWAYS |
7688 MappableExprsHandler::OMP_MAP_TO;
7689 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7690 return MappableExprsHandler::OMP_MAP_TO |
7691 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7692 return MappableExprsHandler::OMP_MAP_PRIVATE |
7693 MappableExprsHandler::OMP_MAP_TO;
7694 }
7695 return MappableExprsHandler::OMP_MAP_TO |
7696 MappableExprsHandler::OMP_MAP_FROM;
7697 }
7698
7699 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7700 // Member of is given by the 16 MSB of the flag, so rotate by 48 bits.
7701 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7702 << 48);
7703 }
7704
7705 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7706 OpenMPOffloadMappingFlags MemberOfFlag) {
7707 // If the entry is PTR_AND_OBJ but has not been marked with the special
7708 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7709 // marked as MEMBER_OF.
7710 if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7711 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7712 return;
7713
7714 // Reset the placeholder value to prepare the flag for the assignment of the
7715 // proper MEMBER_OF value.
7716 Flags &= ~OMP_MAP_MEMBER_OF;
7717 Flags |= MemberOfFlag;
7718 }
7719
7720 void getPlainLayout(const CXXRecordDecl *RD,
7721 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7722 bool AsBase) const {
7723 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7724
7725 llvm::StructType *St =
7726 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7727
7728 unsigned NumElements = St->getNumElements();
7729 llvm::SmallVector<
7730 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7731 RecordLayout(NumElements);
7732
7733 // Fill bases.
7734 for (const auto &I : RD->bases()) {
7735 if (I.isVirtual())
7736 continue;
7737 const auto *Base = I.getType()->getAsCXXRecordDecl();
7738 // Ignore empty bases.
7739 if (Base->isEmpty() || CGF.getContext()
7740 .getASTRecordLayout(Base)
7741 .getNonVirtualSize()
7742 .isZero())
7743 continue;
7744
7745 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7746 RecordLayout[FieldIndex] = Base;
7747 }
7748 // Fill in virtual bases.
7749 for (const auto &I : RD->vbases()) {
7750 const auto *Base = I.getType()->getAsCXXRecordDecl();
7751 // Ignore empty bases.
7752 if (Base->isEmpty())
7753 continue;
7754 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7755 if (RecordLayout[FieldIndex])
7756 continue;
7757 RecordLayout[FieldIndex] = Base;
7758 }
7759 // Fill in all the fields.
7760 assert(!RD->isUnion() && "Unexpected union.");
7761 for (const auto *Field : RD->fields()) {
7762 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7763 // will fill in later.)
7764 if (!Field->isBitField()) {
7765 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7766 RecordLayout[FieldIndex] = Field;
7767 }
7768 }
7769 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7770 &Data : RecordLayout) {
7771 if (Data.isNull())
7772 continue;
7773 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7774 getPlainLayout(Base, Layout, /*AsBase=*/true);
7775 else
7776 Layout.push_back(Data.get<const FieldDecl *>());
7777 }
7778 }
7779
7780public:
7781 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7782 : CurDir(Dir), CGF(CGF) {
7783 // Extract firstprivate clause information.
7784 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7785 for (const auto *D : C->varlists())
7786 FirstPrivateDecls.insert(
7787 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
7788 // Extract device pointer clause information.
7789 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7790 for (auto L : C->component_lists())
7791 DevPointersMap[L.first].push_back(L.second);
7792 }
7793
7794 /// Generate code for the combined entry if we have a partially mapped struct
7795 /// and take care of the mapping flags of the arguments corresponding to
7796 /// individual struct members.
7797 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7798 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7799 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7800 const StructRangeInfoTy &PartialStruct) const {
7801 // Base is the base of the struct
7802 BasePointers.push_back(PartialStruct.Base.getPointer());
7803 // Pointer is the address of the lowest element
7804 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7805 Pointers.push_back(LB);
7806 // Size is (addr of {highest+1} element) - (addr of lowest element)
7807 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7808 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7809 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7810 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7811 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7812 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.SizeTy,
7813 /*isSinged=*/false);
7814 Sizes.push_back(Size);
7815 // Map type is always TARGET_PARAM
7816 Types.push_back(OMP_MAP_TARGET_PARAM);
7817 // Remove TARGET_PARAM flag from the first element
7818 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7819
7820 // All other current entries will be MEMBER_OF the combined entry
7821 // (except for PTR_AND_OBJ entries which do not have a placeholder value
7822 // 0xFFFF in the MEMBER_OF field).
7823 OpenMPOffloadMappingFlags MemberOfFlag =
7824 getMemberOfFlag(BasePointers.size() - 1);
7825 for (auto &M : CurTypes)
7826 setCorrectMemberOfFlag(M, MemberOfFlag);
7827 }
7828
7829 /// Generate all the base pointers, section pointers, sizes and map
7830 /// types for the extracted mappable expressions. Also, for each item that
7831 /// relates with a device pointer, a pair of the relevant declaration and
7832 /// index where it occurs is appended to the device pointers info array.
7833 void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7834 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7835 MapFlagsArrayTy &Types) const {
7836 // We have to process the component lists that relate with the same
7837 // declaration in a single chunk so that we can generate the map flags
7838 // correctly. Therefore, we organize all lists in a map.
7839 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7840
7841 // Helper function to fill the information map for the different supported
7842 // clauses.
7843 auto &&InfoGen = [&Info](
7844 const ValueDecl *D,
7845 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7846 OpenMPMapClauseKind MapType,
7847 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7848 bool ReturnDevicePointer, bool IsImplicit) {
7849 const ValueDecl *VD =
7850 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7851 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7852 IsImplicit);
7853 };
7854
7855 // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7856 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
7857 for (const auto &L : C->component_lists()) {
7858 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
7859 /*ReturnDevicePointer=*/false, C->isImplicit());
7860 }
7861 for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>())
7862 for (const auto &L : C->component_lists()) {
7863 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
7864 /*ReturnDevicePointer=*/false, C->isImplicit());
7865 }
7866 for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>())
7867 for (const auto &L : C->component_lists()) {
7868 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
7869 /*ReturnDevicePointer=*/false, C->isImplicit());
7870 }
7871
7872 // Look at the use_device_ptr clause information and mark the existing map
7873 // entries as such. If there is no map information for an entry in the
7874 // use_device_ptr list, we create one with map type 'alloc' and zero size
7875 // section. It is the user fault if that was not mapped before. If there is
7876 // no map information and the pointer is a struct member, then we defer the
7877 // emission of that entry until the whole struct has been processed.
7878 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
7879 DeferredInfo;
7880
7881 // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7882 for (const auto *C :
7883 this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) {
7884 for (const auto &L : C->component_lists()) {
7885 assert(!L.second.empty() && "Not expecting empty list of components!");
7886 const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
7887 VD = cast<ValueDecl>(VD->getCanonicalDecl());
7888 const Expr *IE = L.second.back().getAssociatedExpression();
7889 // If the first component is a member expression, we have to look into
7890 // 'this', which maps to null in the map of map information. Otherwise
7891 // look directly for the information.
7892 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7893
7894 // We potentially have map information for this declaration already.
7895 // Look for the first set of components that refer to it.
7896 if (It != Info.end()) {
7897 auto CI = std::find_if(
7898 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
7899 return MI.Components.back().getAssociatedDeclaration() == VD;
7900 });
7901 // If we found a map entry, signal that the pointer has to be returned
7902 // and move on to the next declaration.
7903 if (CI != It->second.end()) {
7904 CI->ReturnDevicePointer = true;
7905 continue;
7906 }
7907 }
7908
7909 // We didn't find any match in our map information - generate a zero
7910 // size array section - if the pointer is a struct member we defer this
7911 // action until the whole struct has been processed.
7912 // FIXME: MSVC 2013 seems to require this-> to find member CGF.
7913 if (isa<MemberExpr>(IE)) {
7914 // Insert the pointer into Info to be processed by
7915 // generateInfoForComponentList. Because it is a member pointer
7916 // without a pointee, no entry will be generated for it, therefore
7917 // we need to generate one after the whole struct has been processed.
7918 // Nonetheless, generateInfoForComponentList must be called to take
7919 // the pointer into account for the calculation of the range of the
7920 // partial struct.
7921 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
7922 /*ReturnDevicePointer=*/false, C->isImplicit());
7923 DeferredInfo[nullptr].emplace_back(IE, VD);
7924 } else {
7925 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
7926 this->CGF.EmitLValue(IE), IE->getExprLoc());
7927 BasePointers.emplace_back(Ptr, VD);
7928 Pointers.push_back(Ptr);
7929 Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy));
7930 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
7931 }
7932 }
7933 }
7934
7935 for (const auto &M : Info) {
7936 // We need to know when we generate information for the first component
7937 // associated with a capture, because the mapping flags depend on it.
7938 bool IsFirstComponentList = true;
7939
7940 // Temporary versions of arrays
7941 MapBaseValuesArrayTy CurBasePointers;
7942 MapValuesArrayTy CurPointers;
7943 MapValuesArrayTy CurSizes;
7944 MapFlagsArrayTy CurTypes;
7945 StructRangeInfoTy PartialStruct;
7946
7947 for (const MapInfo &L : M.second) {
7948 assert(!L.Components.empty() &&
7949 "Not expecting declaration with no component lists.");
7950
7951 // Remember the current base pointer index.
7952 unsigned CurrentBasePointersIdx = CurBasePointers.size();
7953 // FIXME: MSVC 2013 seems to require this-> to find the member method.
7954 this->generateInfoForComponentList(
7955 L.MapType, L.MapModifiers, L.Components, CurBasePointers,
7956 CurPointers, CurSizes, CurTypes, PartialStruct,
7957 IsFirstComponentList, L.IsImplicit);
7958
7959 // If this entry relates with a device pointer, set the relevant
7960 // declaration and add the 'return pointer' flag.
7961 if (L.ReturnDevicePointer) {
7962 assert(CurBasePointers.size() > CurrentBasePointersIdx &&
7963 "Unexpected number of mapped base pointers.");
7964
7965 const ValueDecl *RelevantVD =
7966 L.Components.back().getAssociatedDeclaration();
7967 assert(RelevantVD &&
7968 "No relevant declaration related with device pointer??");
7969
7970 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
7971 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
7972 }
7973 IsFirstComponentList = false;
7974 }
7975
7976 // Append any pending zero-length pointers which are struct members and
7977 // used with use_device_ptr.
7978 auto CI = DeferredInfo.find(M.first);
7979 if (CI != DeferredInfo.end()) {
7980 for (const DeferredDevicePtrEntryTy &L : CI->second) {
7981 llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer();
7982 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
7983 this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
7984 CurBasePointers.emplace_back(BasePtr, L.VD);
7985 CurPointers.push_back(Ptr);
7986 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy));
7987 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
7988 // value MEMBER_OF=FFFF so that the entry is later updated with the
7989 // correct value of MEMBER_OF.
7990 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
7991 OMP_MAP_MEMBER_OF);
7992 }
7993 }
7994
7995 // If there is an entry in PartialStruct it means we have a struct with
7996 // individual members mapped. Emit an extra combined entry.
7997 if (PartialStruct.Base.isValid())
7998 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
7999 PartialStruct);
8000
8001 // We need to append the results of this capture to what we already have.
8002 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8003 Pointers.append(CurPointers.begin(), CurPointers.end());
8004 Sizes.append(CurSizes.begin(), CurSizes.end());
8005 Types.append(CurTypes.begin(), CurTypes.end());
8006 }
8007 }
8008
8009 /// Emit capture info for lambdas for variables captured by reference.
8010 void generateInfoForLambdaCaptures(
8011 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8012 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8013 MapFlagsArrayTy &Types,
8014 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8015 const auto *RD = VD->getType()
8016 .getCanonicalType()
8017 .getNonReferenceType()
8018 ->getAsCXXRecordDecl();
8019 if (!RD || !RD->isLambda())
8020 return;
8021 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8022 LValue VDLVal = CGF.MakeAddrLValue(
8023 VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8024 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8025 FieldDecl *ThisCapture = nullptr;
8026 RD->getCaptureFields(Captures, ThisCapture);
8027 if (ThisCapture) {
8028 LValue ThisLVal =
8029 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8030 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8031 LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer());
8032 BasePointers.push_back(ThisLVal.getPointer());
8033 Pointers.push_back(ThisLValVal.getPointer());
8034 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
8035 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8036 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8037 }
8038 for (const LambdaCapture &LC : RD->captures()) {
8039 if (LC.getCaptureKind() != LCK_ByRef)
8040 continue;
8041 const VarDecl *VD = LC.getCapturedVar();
8042 auto It = Captures.find(VD);
8043 assert(It != Captures.end() && "Found lambda capture without field.");
8044 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8045 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8046 LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
8047 BasePointers.push_back(VarLVal.getPointer());
8048 Pointers.push_back(VarLValVal.getPointer());
8049 Sizes.push_back(CGF.getTypeSize(
8050 VD->getType().getCanonicalType().getNonReferenceType()));
8051 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8052 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8053 }
8054 }
8055
8056 /// Set correct indices for lambdas captures.
8057 void adjustMemberOfForLambdaCaptures(
8058 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8059 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8060 MapFlagsArrayTy &Types) const {
8061 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8062 // Set correct member_of idx for all implicit lambda captures.
8063 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8064 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8065 continue;
8066 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8067 assert(BasePtr && "Unable to find base lambda address.");
8068 int TgtIdx = -1;
8069 for (unsigned J = I; J > 0; --J) {
8070 unsigned Idx = J - 1;
8071 if (Pointers[Idx] != BasePtr)
8072 continue;
8073 TgtIdx = Idx;
8074 break;
8075 }
8076 assert(TgtIdx != -1 && "Unable to find parent lambda.");
8077 // All other current entries will be MEMBER_OF the combined entry
8078 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8079 // 0xFFFF in the MEMBER_OF field).
8080 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8081 setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8082 }
8083 }
8084
8085 /// Generate the base pointers, section pointers, sizes and map types
8086 /// associated to a given capture.
8087 void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8088 llvm::Value *Arg,
8089 MapBaseValuesArrayTy &BasePointers,
8090 MapValuesArrayTy &Pointers,
8091 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8092 StructRangeInfoTy &PartialStruct) const {
8093 assert(!Cap->capturesVariableArrayType() &&
8094 "Not expecting to generate map info for a variable array type!");
8095
8096 // We need to know when we generating information for the first component
8097 const ValueDecl *VD = Cap->capturesThis()
8098 ? nullptr
8099 : Cap->getCapturedVar()->getCanonicalDecl();
8100
8101 // If this declaration appears in a is_device_ptr clause we just have to
8102 // pass the pointer by value. If it is a reference to a declaration, we just
8103 // pass its value.
8104 if (DevPointersMap.count(VD)) {
8105 BasePointers.emplace_back(Arg, VD);
8106 Pointers.push_back(Arg);
8107 Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
8108 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8109 return;
8110 }
8111
8112 using MapData =
8113 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8114 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8115 SmallVector<MapData, 4> DeclComponentLists;
8116 // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
8117 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
8118 for (const auto &L : C->decl_component_lists(VD)) {
8119 assert(L.first == VD &&
8120 "We got information for the wrong declaration??");
8121 assert(!L.second.empty() &&
8122 "Not expecting declaration with no component lists.");
8123 DeclComponentLists.emplace_back(L.second, C->getMapType(),
8124 C->getMapTypeModifiers(),
8125 C->isImplicit());
8126 }
8127 }
8128
8129 // Find overlapping elements (including the offset from the base element).
8130 llvm::SmallDenseMap<
8131 const MapData *,
8132 llvm::SmallVector<
8133 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8134 4>
8135 OverlappedData;
8136 size_t Count = 0;
8137 for (const MapData &L : DeclComponentLists) {
8138 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8139 OpenMPMapClauseKind MapType;
8140 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8141 bool IsImplicit;
8142 std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8143 ++Count;
8144 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8145 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8146 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8147 auto CI = Components.rbegin();
8148 auto CE = Components.rend();
8149 auto SI = Components1.rbegin();
8150 auto SE = Components1.rend();
8151 for (; CI != CE && SI != SE; ++CI, ++SI) {
8152 if (CI->getAssociatedExpression()->getStmtClass() !=
8153 SI->getAssociatedExpression()->getStmtClass())
8154 break;
8155 // Are we dealing with different variables/fields?
8156 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8157 break;
8158 }
8159 // Found overlapping if, at least for one component, reached the head of
8160 // the components list.
8161 if (CI == CE || SI == SE) {
8162 assert((CI != CE || SI != SE) &&
8163 "Unexpected full match of the mapping components.");
8164 const MapData &BaseData = CI == CE ? L : L1;
8165 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8166 SI == SE ? Components : Components1;
8167 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8168 OverlappedElements.getSecond().push_back(SubData);
8169 }
8170 }
8171 }
8172 // Sort the overlapped elements for each item.
8173 llvm::SmallVector<const FieldDecl *, 4> Layout;
8174 if (!OverlappedData.empty()) {
8175 if (const auto *CRD =
8176 VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8177 getPlainLayout(CRD, Layout, /*AsBase=*/false);
8178 else {
8179 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8180 Layout.append(RD->field_begin(), RD->field_end());
8181 }
8182 }
8183 for (auto &Pair : OverlappedData) {
8184 llvm::sort(
8185 Pair.getSecond(),
8186 [&Layout](
8187 OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8188 OMPClauseMappableExprCommon::MappableExprComponentListRef
8189 Second) {
8190 auto CI = First.rbegin();
8191 auto CE = First.rend();
8192 auto SI = Second.rbegin();
8193 auto SE = Second.rend();
8194 for (; CI != CE && SI != SE; ++CI, ++SI) {
8195 if (CI->getAssociatedExpression()->getStmtClass() !=
8196 SI->getAssociatedExpression()->getStmtClass())
8197 break;
8198 // Are we dealing with different variables/fields?
8199 if (CI->getAssociatedDeclaration() !=
8200 SI->getAssociatedDeclaration())
8201 break;
8202 }
8203
8204 // Lists contain the same elements.
8205 if (CI == CE && SI == SE)
8206 return false;
8207
8208 // List with less elements is less than list with more elements.
8209 if (CI == CE || SI == SE)
8210 return CI == CE;
8211
8212 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8213 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8214 if (FD1->getParent() == FD2->getParent())
8215 return FD1->getFieldIndex() < FD2->getFieldIndex();
8216 const auto It =
8217 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8218 return FD == FD1 || FD == FD2;
8219 });
8220 return *It == FD1;
8221 });
8222 }
8223
8224 // Associated with a capture, because the mapping flags depend on it.
8225 // Go through all of the elements with the overlapped elements.
8226 for (const auto &Pair : OverlappedData) {
8227 const MapData &L = *Pair.getFirst();
8228 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8229 OpenMPMapClauseKind MapType;
8230 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8231 bool IsImplicit;
8232 std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8233 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8234 OverlappedComponents = Pair.getSecond();
8235 bool IsFirstComponentList = true;
8236 generateInfoForComponentList(MapType, MapModifiers, Components,
8237 BasePointers, Pointers, Sizes, Types,
8238 PartialStruct, IsFirstComponentList,
8239 IsImplicit, OverlappedComponents);
8240 }
8241 // Go through other elements without overlapped elements.
8242 bool IsFirstComponentList = OverlappedData.empty();
8243 for (const MapData &L : DeclComponentLists) {
8244 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8245 OpenMPMapClauseKind MapType;
8246 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8247 bool IsImplicit;
8248 std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8249 auto It = OverlappedData.find(&L);
8250 if (It == OverlappedData.end())
8251 generateInfoForComponentList(MapType, MapModifiers, Components,
8252 BasePointers, Pointers, Sizes, Types,
8253 PartialStruct, IsFirstComponentList,
8254 IsImplicit);
8255 IsFirstComponentList = false;
8256 }
8257 }
8258
8259 /// Generate the base pointers, section pointers, sizes and map types
8260 /// associated with the declare target link variables.
8261 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8262 MapValuesArrayTy &Pointers,
8263 MapValuesArrayTy &Sizes,
8264 MapFlagsArrayTy &Types) const {
8265 // Map other list items in the map clause which are not captured variables
8266 // but "declare target link" global variables.
8267 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
8268 for (const auto &L : C->component_lists()) {
8269 if (!L.first)
8270 continue;
8271 const auto *VD = dyn_cast<VarDecl>(L.first);
8272 if (!VD)
8273 continue;
8274 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8275 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8276 if (!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8277 continue;
8278 StructRangeInfoTy PartialStruct;
8279 generateInfoForComponentList(
8280 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8281 Pointers, Sizes, Types, PartialStruct,
8282 /*IsFirstComponentList=*/true, C->isImplicit());
8283 assert(!PartialStruct.Base.isValid() &&
8284 "No partial structs for declare target link expected.");
8285 }
8286 }
8287 }
8288
8289 /// Generate the default map information for a given capture \a CI,
8290 /// record field declaration \a RI and captured value \a CV.
8291 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8292 const FieldDecl &RI, llvm::Value *CV,
8293 MapBaseValuesArrayTy &CurBasePointers,
8294 MapValuesArrayTy &CurPointers,
8295 MapValuesArrayTy &CurSizes,
8296 MapFlagsArrayTy &CurMapTypes) const {
8297 // Do the default mapping.
8298 if (CI.capturesThis()) {
8299 CurBasePointers.push_back(CV);
8300 CurPointers.push_back(CV);
8301 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8302 CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType()));
8303 // Default map type.
8304 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8305 } else if (CI.capturesVariableByCopy()) {
8306 CurBasePointers.push_back(CV);
8307 CurPointers.push_back(CV);
8308 if (!RI.getType()->isAnyPointerType()) {
8309 // We have to signal to the runtime captures passed by value that are
8310 // not pointers.
8311 CurMapTypes.push_back(OMP_MAP_LITERAL);
8312 CurSizes.push_back(CGF.getTypeSize(RI.getType()));
8313 } else {
8314 // Pointers are implicitly mapped with a zero size and no flags
8315 // (other than first map that is added for all implicit maps).
8316 CurMapTypes.push_back(OMP_MAP_NONE);
8317 CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy));
8318 }
8319 } else {
8320 assert(CI.capturesVariable() && "Expected captured reference.");
8321 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8322 QualType ElementType = PtrTy->getPointeeType();
8323 CurSizes.push_back(CGF.getTypeSize(ElementType));
8324 // The default map type for a scalar/complex type is 'to' because by
8325 // default the value doesn't have to be retrieved. For an aggregate
8326 // type, the default is 'tofrom'.
8327 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8328 const VarDecl *VD = CI.getCapturedVar();
8329 if (FirstPrivateDecls.count(VD) &&
8330 VD->getType().isConstant(CGF.getContext())) {
8331 llvm::Constant *Addr =
8332 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8333 // Copy the value of the original variable to the new global copy.
8334 CGF.Builder.CreateMemCpy(
8335 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(),
8336 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8337 CurSizes.back(), /*isVolatile=*/false);
8338 // Use new global variable as the base pointers.
8339 CurBasePointers.push_back(Addr);
8340 CurPointers.push_back(Addr);
8341 } else {
8342 CurBasePointers.push_back(CV);
8343 if (FirstPrivateDecls.count(VD) && ElementType->isAnyPointerType()) {
8344 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8345 CV, ElementType, CGF.getContext().getDeclAlign(VD),
8346 AlignmentSource::Decl));
8347 CurPointers.push_back(PtrAddr.getPointer());
8348 } else {
8349 CurPointers.push_back(CV);
8350 }
8351 }
8352 }
8353 // Every default map produces a single argument which is a target parameter.
8354 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8355
8356 // Add flag stating this is an implicit map.
8357 CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8358 }
8359};
8360
8361enum OpenMPOffloadingReservedDeviceIDs {
8362 /// Device ID if the device was not defined, runtime should get it
8363 /// from environment variables in the spec.
8364 OMP_DEVICEID_UNDEF = -1,
8365};
8366} // anonymous namespace
8367
8368/// Emit the arrays used to pass the captures and map information to the
8369/// offloading runtime library. If there is no map or capture information,
8370/// return nullptr by reference.
8371static void
8372emitOffloadingArrays(CodeGenFunction &CGF,
8373 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8374 MappableExprsHandler::MapValuesArrayTy &Pointers,
8375 MappableExprsHandler::MapValuesArrayTy &Sizes,
8376 MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8377 CGOpenMPRuntime::TargetDataInfo &Info) {
8378 CodeGenModule &CGM = CGF.CGM;
8379 ASTContext &Ctx = CGF.getContext();
8380
8381 // Reset the array information.
8382 Info.clearArrayInfo();
8383 Info.NumberOfPtrs = BasePointers.size();
8384
8385 if (Info.NumberOfPtrs) {
8386 // Detect if we have any capture size requiring runtime evaluation of the
8387 // size so that a constant array could be eventually used.
8388 bool hasRuntimeEvaluationCaptureSize = false;
8389 for (llvm::Value *S : Sizes)
8390 if (!isa<llvm::Constant>(S)) {
8391 hasRuntimeEvaluationCaptureSize = true;
8392 break;
8393 }
8394
8395 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8396 QualType PointerArrayType =
8397 Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
8398 /*IndexTypeQuals=*/0);
8399
8400 Info.BasePointersArray =
8401 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8402 Info.PointersArray =
8403 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8404
8405 // If we don't have any VLA types or other types that require runtime
8406 // evaluation, we can use a constant array for the map sizes, otherwise we
8407 // need to fill up the arrays as we do for the pointers.
8408 if (hasRuntimeEvaluationCaptureSize) {
8409 QualType SizeArrayType = Ctx.getConstantArrayType(
8410 Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
8411 /*IndexTypeQuals=*/0);
8412 Info.SizesArray =
8413 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8414 } else {
8415 // We expect all the sizes to be constant, so we collect them to create
8416 // a constant array.
8417 SmallVector<llvm::Constant *, 16> ConstSizes;
8418 for (llvm::Value *S : Sizes)
8419 ConstSizes.push_back(cast<llvm::Constant>(S));
8420
8421 auto *SizesArrayInit = llvm::ConstantArray::get(
8422 llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
8423 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8424 auto *SizesArrayGbl = new llvm::GlobalVariable(
8425 CGM.getModule(), SizesArrayInit->getType(),
8426 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8427 SizesArrayInit, Name);
8428 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8429 Info.SizesArray = SizesArrayGbl;
8430 }
8431
8432 // The map types are always constant so we don't need to generate code to
8433 // fill arrays. Instead, we create an array constant.
8434 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8435 llvm::copy(MapTypes, Mapping.begin());
8436 llvm::Constant *MapTypesArrayInit =
8437 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8438 std::string MaptypesName =
8439 CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8440 auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8441 CGM.getModule(), MapTypesArrayInit->getType(),
8442 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8443 MapTypesArrayInit, MaptypesName);
8444 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8445 Info.MapTypesArray = MapTypesArrayGbl;
8446
8447 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8448 llvm::Value *BPVal = *BasePointers[I];
8449 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8450 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8451 Info.BasePointersArray, 0, I);
8452 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8453 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8454 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8455 CGF.Builder.CreateStore(BPVal, BPAddr);
8456
8457 if (Info.requiresDevicePointerInfo())
8458 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8459 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8460
8461 llvm::Value *PVal = Pointers[I];
8462 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8463 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8464 Info.PointersArray, 0, I);
8465 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8466 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8467 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8468 CGF.Builder.CreateStore(PVal, PAddr);
8469
8470 if (hasRuntimeEvaluationCaptureSize) {
8471 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8472 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs),
8473 Info.SizesArray,
8474 /*Idx0=*/0,
8475 /*Idx1=*/I);
8476 Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
8477 CGF.Builder.CreateStore(
8478 CGF.Builder.CreateIntCast(Sizes[I], CGM.SizeTy, /*isSigned=*/true),
8479 SAddr);
8480 }
8481 }
8482 }
8483}
8484/// Emit the arguments to be passed to the runtime library based on the
8485/// arrays of pointers, sizes and map types.
8486static void emitOffloadingArraysArgument(
8487 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8488 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8489 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8490 CodeGenModule &CGM = CGF.CGM;
8491 if (Info.NumberOfPtrs) {
8492 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8493 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8494 Info.BasePointersArray,
8495 /*Idx0=*/0, /*Idx1=*/0);
8496 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8497 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8498 Info.PointersArray,
8499 /*Idx0=*/0,
8500 /*Idx1=*/0);
8501 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8502 llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray,
8503 /*Idx0=*/0, /*Idx1=*/0);
8504 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8505 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8506 Info.MapTypesArray,
8507 /*Idx0=*/0,
8508 /*Idx1=*/0);
8509 } else {
8510 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8511 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8512 unsigned DefaultAS = CGM.getTargetCodeGenInfo().getDefaultAS();
8513 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo(DefaultAS));
8514 MapTypesArrayArg =
8515 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo(DefaultAS));
8516 }
8517}
8518
8519/// Check for inner distribute directive.
8520static const OMPExecutableDirective *
8521getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8522 const auto *CS = D.getInnermostCapturedStmt();
8523 const auto *Body =
8524 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8525 const Stmt *ChildStmt =
8526 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8527
8528 if (const auto *NestedDir =
8529 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8530 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8531 switch (D.getDirectiveKind()) {
8532 case OMPD_target:
8533 if (isOpenMPDistributeDirective(DKind))
8534 return NestedDir;
8535 if (DKind == OMPD_teams) {
8536 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8537 /*IgnoreCaptured=*/true);
8538 if (!Body)
8539 return nullptr;
8540 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8541 if (const auto *NND =
8542 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8543 DKind = NND->getDirectiveKind();
8544 if (isOpenMPDistributeDirective(DKind))
8545 return NND;
8546 }
8547 }
8548 return nullptr;
8549 case OMPD_target_teams:
8550 if (isOpenMPDistributeDirective(DKind))
8551 return NestedDir;
8552 return nullptr;
8553 case OMPD_target_parallel:
8554 case OMPD_target_simd:
8555 case OMPD_target_parallel_for:
8556 case OMPD_target_parallel_for_simd:
8557 return nullptr;
8558 case OMPD_target_teams_distribute:
8559 case OMPD_target_teams_distribute_simd:
8560 case OMPD_target_teams_distribute_parallel_for:
8561 case OMPD_target_teams_distribute_parallel_for_simd:
8562 case OMPD_parallel:
8563 case OMPD_for:
8564 case OMPD_parallel_for:
8565 case OMPD_parallel_sections:
8566 case OMPD_for_simd:
8567 case OMPD_parallel_for_simd:
8568 case OMPD_cancel:
8569 case OMPD_cancellation_point:
8570 case OMPD_ordered:
8571 case OMPD_threadprivate:
8572 case OMPD_allocate:
8573 case OMPD_task:
8574 case OMPD_simd:
8575 case OMPD_sections:
8576 case OMPD_section:
8577 case OMPD_single:
8578 case OMPD_master:
8579 case OMPD_critical:
8580 case OMPD_taskyield:
8581 case OMPD_barrier:
8582 case OMPD_taskwait:
8583 case OMPD_taskgroup:
8584 case OMPD_atomic:
8585 case OMPD_flush:
8586 case OMPD_teams:
8587 case OMPD_target_data:
8588 case OMPD_target_exit_data:
8589 case OMPD_target_enter_data:
8590 case OMPD_distribute:
8591 case OMPD_distribute_simd:
8592 case OMPD_distribute_parallel_for:
8593 case OMPD_distribute_parallel_for_simd:
8594 case OMPD_teams_distribute:
8595 case OMPD_teams_distribute_simd:
8596 case OMPD_teams_distribute_parallel_for:
8597 case OMPD_teams_distribute_parallel_for_simd:
8598 case OMPD_target_update:
8599 case OMPD_declare_simd:
8600 case OMPD_declare_target:
8601 case OMPD_end_declare_target:
8602 case OMPD_declare_reduction:
8603 case OMPD_declare_mapper:
8604 case OMPD_taskloop:
8605 case OMPD_taskloop_simd:
8606 case OMPD_requires:
8607 case OMPD_unknown:
8608 llvm_unreachable("Unexpected directive.");
8609 }
8610 }
8611
8612 return nullptr;
8613}
8614
8615void CGOpenMPRuntime::emitTargetNumIterationsCall(
8616 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device,
8617 const llvm::function_ref<llvm::Value *(
8618 CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) {
8619 OpenMPDirectiveKind Kind = D.getDirectiveKind();
8620 const OMPExecutableDirective *TD = &D;
8621 // Get nested teams distribute kind directive, if any.
8622 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
8623 TD = getNestedDistributeDirective(CGM.getContext(), D);
8624 if (!TD)
8625 return;
8626 const auto *LD = cast<OMPLoopDirective>(TD);
8627 auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF,
8628 PrePostActionTy &) {
8629 llvm::Value *NumIterations = SizeEmitter(CGF, *LD);
8630
8631 // Emit device ID if any.
8632 llvm::Value *DeviceID;
8633 if (Device)
8634 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8635 CGF.Int64Ty, /*isSigned=*/true);
8636 else
8637 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8638
8639 llvm::Value *Args[] = {DeviceID, NumIterations};
8640 CGF.EmitRuntimeCall(
8641 createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
8642 };
8643 emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
8644}
8645
8646void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
8647 const OMPExecutableDirective &D,
8648 llvm::Function *OutlinedFn,
8649 llvm::Value *OutlinedFnID,
8650 const Expr *IfCond, const Expr *Device) {
8651 if (!CGF.HaveInsertPoint())
8652 return;
8653
8654 assert(OutlinedFn && "Invalid outlined function!");
8655
8656 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
8657 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
8658 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
8659 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
8660 PrePostActionTy &) {
8661 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8662 };
8663 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
8664
8665 CodeGenFunction::OMPTargetDataInfo InputInfo;
8666 llvm::Value *MapTypesArray = nullptr;
8667 // Fill up the pointer arrays and transfer execution to the device.
8668 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
8669 &MapTypesArray, &CS, RequiresOuterTask,
8670 &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) {
8671 // On top of the arrays that were filled up, the target offloading call
8672 // takes as arguments the device id as well as the host pointer. The host
8673 // pointer is used by the runtime library to identify the current target
8674 // region, so it only has to be unique and not necessarily point to
8675 // anything. It could be the pointer to the outlined function that
8676 // implements the target region, but we aren't using that so that the
8677 // compiler doesn't need to keep that, and could therefore inline the host
8678 // function if proven worthwhile during optimization.
8679
8680 // From this point on, we need to have an ID of the target region defined.
8681 assert(OutlinedFnID && "Invalid outlined function ID!");
8682
8683 // Emit device ID if any.
8684 llvm::Value *DeviceID;
8685 if (Device) {
8686 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8687 CGF.Int64Ty, /*isSigned=*/true);
8688 } else {
8689 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8690 }
8691
8692 // Emit the number of elements in the offloading arrays.
8693 llvm::Value *PointerNum =
8694 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
8695
8696 // Return value of the runtime offloading call.
8697 llvm::Value *Return;
8698
8699 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
8700 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
8701
8702 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
8703 // The target region is an outlined function launched by the runtime
8704 // via calls __tgt_target() or __tgt_target_teams().
8705 //
8706 // __tgt_target() launches a target region with one team and one thread,
8707 // executing a serial region. This master thread may in turn launch
8708 // more threads within its team upon encountering a parallel region,
8709 // however, no additional teams can be launched on the device.
8710 //
8711 // __tgt_target_teams() launches a target region with one or more teams,
8712 // each with one or more threads. This call is required for target
8713 // constructs such as:
8714 // 'target teams'
8715 // 'target' / 'teams'
8716 // 'target teams distribute parallel for'
8717 // 'target parallel'
8718 // and so on.
8719 //
8720 // Note that on the host and CPU targets, the runtime implementation of
8721 // these calls simply call the outlined function without forking threads.
8722 // The outlined functions themselves have runtime calls to
8723 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
8724 // the compiler in emitTeamsCall() and emitParallelCall().
8725 //
8726 // In contrast, on the NVPTX target, the implementation of
8727 // __tgt_target_teams() launches a GPU kernel with the requested number
8728 // of teams and threads so no additional calls to the runtime are required.
8729 if (NumTeams) {
8730 // If we have NumTeams defined this means that we have an enclosed teams
8731 // region. Therefore we also expect to have NumThreads defined. These two
8732 // values should be defined in the presence of a teams directive,
8733 // regardless of having any clauses associated. If the user is using teams
8734 // but no clauses, these two values will be the default that should be
8735 // passed to the runtime library - a 32-bit integer with the value zero.
8736 assert(NumThreads && "Thread limit expression should be available along "
8737 "with number of teams.");
8738 llvm::Value *OffloadingArgs[] = {DeviceID,
8739 OutlinedFnID,
8740 PointerNum,
8741 InputInfo.BasePointersArray.getPointer(),
8742 InputInfo.PointersArray.getPointer(),
8743 InputInfo.SizesArray.getPointer(),
8744 MapTypesArray,
8745 NumTeams,
8746 NumThreads};
8747 Return = CGF.EmitRuntimeCall(
8748 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
8749 : OMPRTL__tgt_target_teams),
8750 OffloadingArgs);
8751 } else {
8752 llvm::Value *OffloadingArgs[] = {DeviceID,
8753 OutlinedFnID,
8754 PointerNum,
8755 InputInfo.BasePointersArray.getPointer(),
8756 InputInfo.PointersArray.getPointer(),
8757 InputInfo.SizesArray.getPointer(),
8758 MapTypesArray};
8759 Return = CGF.EmitRuntimeCall(
8760 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
8761 : OMPRTL__tgt_target),
8762 OffloadingArgs);
8763 }
8764
8765 // Check the error code and execute the host version if required.
8766 llvm::BasicBlock *OffloadFailedBlock =
8767 CGF.createBasicBlock("omp_offload.failed");
8768 llvm::BasicBlock *OffloadContBlock =
8769 CGF.createBasicBlock("omp_offload.cont");
8770 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
8771 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
8772
8773 CGF.EmitBlock(OffloadFailedBlock);
8774 if (RequiresOuterTask) {
8775 CapturedVars.clear();
8776 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8777 }
8778 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
8779 CGF.EmitBranch(OffloadContBlock);
8780
8781 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
8782 };
8783
8784 // Notify that the host version must be executed.
8785 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
8786 RequiresOuterTask](CodeGenFunction &CGF,
8787 PrePostActionTy &) {
8788 if (RequiresOuterTask) {
8789 CapturedVars.clear();
8790 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8791 }
8792 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
8793 };
8794
8795 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
8796 &CapturedVars, RequiresOuterTask,
8797 &CS](CodeGenFunction &CGF, PrePostActionTy &) {
8798 // Fill up the arrays with all the captured variables.
8799 MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
8800 MappableExprsHandler::MapValuesArrayTy Pointers;
8801 MappableExprsHandler::MapValuesArrayTy Sizes;
8802 MappableExprsHandler::MapFlagsArrayTy MapTypes;
8803
8804 // Get mappable expression information.
8805 MappableExprsHandler MEHandler(D, CGF);
8806 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
8807
8808 auto RI = CS.getCapturedRecordDecl()->field_begin();
8809 auto CV = CapturedVars.begin();
8810 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
8811 CE = CS.capture_end();
8812 CI != CE; ++CI, ++RI, ++CV) {
8813 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
8814 MappableExprsHandler::MapValuesArrayTy CurPointers;
8815 MappableExprsHandler::MapValuesArrayTy CurSizes;
8816 MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
8817 MappableExprsHandler::StructRangeInfoTy PartialStruct;
8818
8819 // VLA sizes are passed to the outlined region by copy and do not have map
8820 // information associated.
8821 if (CI->capturesVariableArrayType()) {
8822 CurBasePointers.push_back(*CV);
8823 CurPointers.push_back(*CV);
8824 CurSizes.push_back(CGF.getTypeSize(RI->getType()));
8825 // Copy to the device as an argument. No need to retrieve it.
8826 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
8827 MappableExprsHandler::OMP_MAP_TARGET_PARAM);
8828 } else {
8829 // If we have any information in the map clause, we use it, otherwise we
8830 // just do a default mapping.
8831 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
8832 CurSizes, CurMapTypes, PartialStruct);
8833 if (CurBasePointers.empty())
8834 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
8835 CurPointers, CurSizes, CurMapTypes);
8836 // Generate correct mapping for variables captured by reference in
8837 // lambdas.
8838 if (CI->capturesVariable())
8839 MEHandler.generateInfoForLambdaCaptures(
8840 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
8841 CurMapTypes, LambdaPointers);
8842 }
8843 // We expect to have at least an element of information for this capture.
8844 assert(!CurBasePointers.empty() &&
8845 "Non-existing map pointer for capture!");
8846 assert(CurBasePointers.size() == CurPointers.size() &&
8847 CurBasePointers.size() == CurSizes.size() &&
8848 CurBasePointers.size() == CurMapTypes.size() &&
8849 "Inconsistent map information sizes!");
8850
8851 // If there is an entry in PartialStruct it means we have a struct with
8852 // individual members mapped. Emit an extra combined entry.
8853 if (PartialStruct.Base.isValid())
8854 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
8855 CurMapTypes, PartialStruct);
8856
8857 // We need to append the results of this capture to what we already have.
8858 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8859 Pointers.append(CurPointers.begin(), CurPointers.end());
8860 Sizes.append(CurSizes.begin(), CurSizes.end());
8861 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
8862 }
8863 // Adjust MEMBER_OF flags for the lambdas captures.
8864 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
8865 Pointers, MapTypes);
8866 // Map other list items in the map clause which are not captured variables
8867 // but "declare target link" global variables.
8868 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
8869 MapTypes);
8870
8871 TargetDataInfo Info;
8872 // Fill up the arrays and create the arguments.
8873 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
8874 emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
8875 Info.PointersArray, Info.SizesArray,
8876 Info.MapTypesArray, Info);
8877 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
8878 InputInfo.BasePointersArray =
8879 Address(Info.BasePointersArray, CGM.getPointerAlign());
8880 InputInfo.PointersArray =
8881 Address(Info.PointersArray, CGM.getPointerAlign());
8882 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
8883 MapTypesArray = Info.MapTypesArray;
8884 if (RequiresOuterTask)
8885 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
8886 else
8887 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
8888 };
8889
8890 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
8891 CodeGenFunction &CGF, PrePostActionTy &) {
8892 if (RequiresOuterTask) {
8893 CodeGenFunction::OMPTargetDataInfo InputInfo;
8894 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
8895 } else {
8896 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
8897 }
8898 };
8899
8900 // If we have a target function ID it means that we need to support
8901 // offloading, otherwise, just execute on the host. We need to execute on host
8902 // regardless of the conditional in the if clause if, e.g., the user do not
8903 // specify target triples.
8904 if (OutlinedFnID) {
8905 if (IfCond) {
8906 emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
8907 } else {
8908 RegionCodeGenTy ThenRCG(TargetThenGen);
8909 ThenRCG(CGF);
8910 }
8911 } else {
8912 RegionCodeGenTy ElseRCG(TargetElseGen);
8913 ElseRCG(CGF);
8914 }
8915}
8916
8917void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
8918 StringRef ParentName) {
8919 if (!S)
8920 return;
8921
8922 // Codegen OMP target directives that offload compute to the device.
8923 bool RequiresDeviceCodegen =
8924 isa<OMPExecutableDirective>(S) &&
8925 isOpenMPTargetExecutionDirective(
8926 cast<OMPExecutableDirective>(S)->getDirectiveKind());
8927
8928 if (RequiresDeviceCodegen) {
8929 const auto &E = *cast<OMPExecutableDirective>(S);
8930 unsigned DeviceID;
8931 unsigned FileID;
8932 unsigned Line;
8933 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
8934 FileID, Line);
8935
8936 // Is this a target region that should not be emitted as an entry point? If
8937 // so just signal we are done with this target region.
8938 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
8939 ParentName, Line))
8940 return;
8941
8942 switch (E.getDirectiveKind()) {
8943 case OMPD_target:
8944 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
8945 cast<OMPTargetDirective>(E));
8946 break;
8947 case OMPD_target_parallel:
8948 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
8949 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
8950 break;
8951 case OMPD_target_teams:
8952 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
8953 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
8954 break;
8955 case OMPD_target_teams_distribute:
8956 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
8957 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
8958 break;
8959 case OMPD_target_teams_distribute_simd:
8960 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
8961 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
8962 break;
8963 case OMPD_target_parallel_for:
8964 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
8965 CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
8966 break;
8967 case OMPD_target_parallel_for_simd:
8968 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
8969 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
8970 break;
8971 case OMPD_target_simd:
8972 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
8973 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
8974 break;
8975 case OMPD_target_teams_distribute_parallel_for:
8976 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
8977 CGM, ParentName,
8978 cast<OMPTargetTeamsDistributeParallelForDirective>(E));
8979 break;
8980 case OMPD_target_teams_distribute_parallel_for_simd:
8981 CodeGenFunction::
8982 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
8983 CGM, ParentName,
8984 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
8985 break;
8986 case OMPD_parallel:
8987 case OMPD_for:
8988 case OMPD_parallel_for:
8989 case OMPD_parallel_sections:
8990 case OMPD_for_simd:
8991 case OMPD_parallel_for_simd:
8992 case OMPD_cancel:
8993 case OMPD_cancellation_point:
8994 case OMPD_ordered:
8995 case OMPD_threadprivate:
8996 case OMPD_allocate:
8997 case OMPD_task:
8998 case OMPD_simd:
8999 case OMPD_sections:
9000 case OMPD_section:
9001 case OMPD_single:
9002 case OMPD_master:
9003 case OMPD_critical:
9004 case OMPD_taskyield:
9005 case OMPD_barrier:
9006 case OMPD_taskwait:
9007 case OMPD_taskgroup:
9008 case OMPD_atomic:
9009 case OMPD_flush:
9010 case OMPD_teams:
9011 case OMPD_target_data:
9012 case OMPD_target_exit_data:
9013 case OMPD_target_enter_data:
9014 case OMPD_distribute:
9015 case OMPD_distribute_simd:
9016 case OMPD_distribute_parallel_for:
9017 case OMPD_distribute_parallel_for_simd:
9018 case OMPD_teams_distribute:
9019 case OMPD_teams_distribute_simd:
9020 case OMPD_teams_distribute_parallel_for:
9021 case OMPD_teams_distribute_parallel_for_simd:
9022 case OMPD_target_update:
9023 case OMPD_declare_simd:
9024 case OMPD_declare_target:
9025 case OMPD_end_declare_target:
9026 case OMPD_declare_reduction:
9027 case OMPD_declare_mapper:
9028 case OMPD_taskloop:
9029 case OMPD_taskloop_simd:
9030 case OMPD_requires:
9031 case OMPD_unknown:
9032 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9033 }
9034 return;
9035 }
9036
9037 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9038 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9039 return;
9040
9041 scanForTargetRegionsFunctions(
9042 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9043 return;
9044 }
9045
9046 // If this is a lambda function, look into its body.
9047 if (const auto *L = dyn_cast<LambdaExpr>(S))
9048 S = L->getBody();
9049
9050 // Keep looking for target regions recursively.
9051 for (const Stmt *II : S->children())
9052 scanForTargetRegionsFunctions(II, ParentName);
9053}
9054
9055bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9056 // If emitting code for the host, we do not process FD here. Instead we do
9057 // the normal code generation.
9058 if (!CGM.getLangOpts().OpenMPIsDevice)
9059 return false;
9060
9061 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9062 StringRef Name = CGM.getMangledName(GD);
9063 // Try to detect target regions in the function.
9064 if (const auto *FD = dyn_cast<FunctionDecl>(VD))
9065 scanForTargetRegionsFunctions(FD->getBody(), Name);
9066
9067 // Do not to emit function if it is not marked as declare target.
9068 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9069 AlreadyEmittedTargetFunctions.count(Name) == 0;
9070}
9071
9072bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9073 if (!CGM.getLangOpts().OpenMPIsDevice)
9074 return false;
9075
9076 // Check if there are Ctors/Dtors in this declaration and look for target
9077 // regions in it. We use the complete variant to produce the kernel name
9078 // mangling.
9079 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9080 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9081 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9082 StringRef ParentName =
9083 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9084 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9085 }
9086 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9087 StringRef ParentName =
9088 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9089 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9090 }
9091 }
9092
9093 // Do not to emit variable if it is not marked as declare target.
9094 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9095 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9096 cast<VarDecl>(GD.getDecl()));
9097 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) {
9098 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9099 return true;
9100 }
9101 return false;
9102}
9103
9104llvm::Constant *
9105CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9106 const VarDecl *VD) {
9107 assert(VD->getType().isConstant(CGM.getContext()) &&
9108 "Expected constant variable.");
9109 StringRef VarName;
9110 llvm::Constant *Addr;
9111 llvm::GlobalValue::LinkageTypes Linkage;
9112 QualType Ty = VD->getType();
9113 SmallString<128> Buffer;
9114 {
9115 unsigned DeviceID;
9116 unsigned FileID;
9117 unsigned Line;
9118 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9119 FileID, Line);
9120 llvm::raw_svector_ostream OS(Buffer);
9121 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9122 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9123 VarName = OS.str();
9124 }
9125 Linkage = llvm::GlobalValue::InternalLinkage;
9126 Addr =
9127 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9128 getDefaultFirstprivateAddressSpace());
9129 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9130 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9131 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9132 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9133 VarName, Addr, VarSize,
9134 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9135 return Addr;
9136}
9137
9138void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9139 llvm::Constant *Addr) {
9140 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9141 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9142 if (!Res) {
9143 if (CGM.getLangOpts().OpenMPIsDevice) {
9144 // Register non-target variables being emitted in device code (debug info
9145 // may cause this).
9146 StringRef VarName = CGM.getMangledName(VD);
9147 EmittedNonTargetVariables.try_emplace(VarName, Addr);
9148 }
9149 return;
9150 }
9151 // Register declare target variables.
9152 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9153 StringRef VarName;
9154 CharUnits VarSize;
9155 llvm::GlobalValue::LinkageTypes Linkage;
9156 switch (*Res) {
9157 case OMPDeclareTargetDeclAttr::MT_To:
9158 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9159 VarName = CGM.getMangledName(VD);
9160 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9161 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9162 assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9163 } else {
9164 VarSize = CharUnits::Zero();
9165 }
9166 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9167 // Temp solution to prevent optimizations of the internal variables.
9168 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9169 std::string RefName = getName({VarName, "ref"});
9170 if (!CGM.GetGlobalValue(RefName)) {
9171 llvm::Constant *AddrRef =
9172 getOrCreateInternalVariable(Addr->getType(), RefName);
9173 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9174 GVAddrRef->setConstant(/*Val=*/true);
9175 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9176 GVAddrRef->setInitializer(Addr);
9177 CGM.addCompilerUsedGlobal(GVAddrRef);
9178 }
9179 }
9180 break;
9181 case OMPDeclareTargetDeclAttr::MT_Link:
9182 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9183 if (CGM.getLangOpts().OpenMPIsDevice) {
9184 VarName = Addr->getName();
9185 Addr = nullptr;
9186 } else {
9187 VarName = getAddrOfDeclareTargetLink(VD).getName();
9188 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer());
9189 }
9190 VarSize = CGM.getPointerSize();
9191 Linkage = llvm::GlobalValue::WeakAnyLinkage;
9192 break;
9193 }
9194 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9195 VarName, Addr, VarSize, Flags, Linkage);
9196}
9197
9198bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9199 if (isa<FunctionDecl>(GD.getDecl()) ||
9200 isa<OMPDeclareReductionDecl>(GD.getDecl()))
9201 return emitTargetFunctions(GD);
9202
9203 return emitTargetGlobalVariable(GD);
9204}
9205
9206void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9207 for (const VarDecl *VD : DeferredGlobalVariables) {
9208 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9209 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9210 if (!Res)
9211 continue;
9212 if (*Res == OMPDeclareTargetDeclAttr::MT_To) {
9213 CGM.EmitGlobal(VD);
9214 } else {
9215 assert(*Res == OMPDeclareTargetDeclAttr::MT_Link &&
9216 "Expected to or link clauses.");
9217 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD);
9218 }
9219 }
9220}
9221
9222void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9223 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9224 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9225 " Expected target-based directive.");
9226}
9227
9228void CGOpenMPRuntime::checkArchForUnifiedAddressing(
9229 const OMPRequiresDecl *D) {
9230 for (const OMPClause *Clause : D->clauselists()) {
9231 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9232 HasRequiresUnifiedSharedMemory = true;
9233 break;
9234 }
9235 }
9236}
9237
9238bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
9239 LangAS &AS) {
9240 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9241 return false;
9242 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9243 switch(A->getAllocatorType()) {
9244 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9245 // Not supported, fallback to the default mem space.
9246 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9247 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9248 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9249 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9250 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9251 case OMPAllocateDeclAttr::OMPConstMemAlloc:
9252 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9253 AS = LangAS::Default;
9254 return true;
9255 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9256 llvm_unreachable("Expected predefined allocator for the variables with the "
9257 "static storage.");
9258 }
9259 return false;
9260}
9261
9262CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
9263 CodeGenModule &CGM)
9264 : CGM(CGM) {
9265 if (CGM.getLangOpts().OpenMPIsDevice) {
9266 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
9267 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
9268 }
9269}
9270
9271CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
9272 if (CGM.getLangOpts().OpenMPIsDevice)
9273 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
9274}
9275
9276bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
9277 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
9278 return true;
9279
9280 StringRef Name = CGM.getMangledName(GD);
9281 const auto *D = cast<FunctionDecl>(GD.getDecl());
9282 // Do not to emit function if it is marked as declare target as it was already
9283 // emitted.
9284 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
9285 if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) {
9286 if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name)))
9287 return !F->isDeclaration();
9288 return false;
9289 }
9290 return true;
9291 }
9292
9293 return !AlreadyEmittedTargetFunctions.insert(Name).second;
9294}
9295
9296llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
9297 // If we don't have entries or if we are emitting code for the device, we
9298 // don't need to do anything.
9299 if (CGM.getLangOpts().OMPTargetTriples.empty() ||
9300 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
9301 (OffloadEntriesInfoManager.empty() &&
9302 !HasEmittedDeclareTargetRegion &&
9303 !HasEmittedTargetRegion))
9304 return nullptr;
9305
9306 // Create and register the function that handles the requires directives.
9307 ASTContext &C = CGM.getContext();
9308
9309 llvm::Function *RequiresRegFn;
9310 {
9311 CodeGenFunction CGF(CGM);
9312 const auto &FI = CGM.getTypes().arrangeNullaryFunction();
9313 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
9314 std::string ReqName = getName({"omp_offloading", "requires_reg"});
9315 RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI);
9316 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
9317 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
9318 // TODO: check for other requires clauses.
9319 // The requires directive takes effect only when a target region is
9320 // present in the compilation unit. Otherwise it is ignored and not
9321 // passed to the runtime. This avoids the runtime from throwing an error
9322 // for mismatching requires clauses across compilation units that don't
9323 // contain at least 1 target region.
9324 assert((HasEmittedTargetRegion ||
9325 HasEmittedDeclareTargetRegion ||
9326 !OffloadEntriesInfoManager.empty()) &&
9327 "Target or declare target region expected.");
9328 if (HasRequiresUnifiedSharedMemory)
9329 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
9330 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires),
9331 llvm::ConstantInt::get(CGM.Int64Ty, Flags));
9332 CGF.FinishFunction();
9333 }
9334 return RequiresRegFn;
9335}
9336
9337llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
9338 // If we have offloading in the current module, we need to emit the entries
9339 // now and register the offloading descriptor.
9340 createOffloadEntriesAndInfoMetadata();
9341
9342 // Create and register the offloading binary descriptors. This is the main
9343 // entity that captures all the information about offloading in the current
9344 // compilation unit.
9345 return createOffloadingBinaryDescriptorRegistration();
9346}
9347
9348void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
9349 const OMPExecutableDirective &D,
9350 SourceLocation Loc,
9351 llvm::Function *OutlinedFn,
9352 ArrayRef<llvm::Value *> CapturedVars) {
9353 if (!CGF.HaveInsertPoint())
9354 return;
9355
9356 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9357 CodeGenFunction::RunCleanupsScope Scope(CGF);
9358
9359 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
9360 llvm::Value *Args[] = {
9361 RTLoc,
9362 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
9363 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
9364 llvm::SmallVector<llvm::Value *, 16> RealArgs;
9365 RealArgs.append(std::begin(Args), std::end(Args));
9366 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
9367
9368 llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
9369 CGF.EmitRuntimeCall(RTLFn, RealArgs);
9370}
9371
9372void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
9373 const Expr *NumTeams,
9374 const Expr *ThreadLimit,
9375 SourceLocation Loc) {
9376 if (!CGF.HaveInsertPoint())
9377 return;
9378
9379 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9380
9381 llvm::Value *NumTeamsVal =
9382 NumTeams
9383 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
9384 CGF.CGM.Int32Ty, /* isSigned = */ true)
9385 : CGF.Builder.getInt32(0);
9386
9387 llvm::Value *ThreadLimitVal =
9388 ThreadLimit
9389 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
9390 CGF.CGM.Int32Ty, /* isSigned = */ true)
9391 : CGF.Builder.getInt32(0);
9392
9393 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
9394 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
9395 ThreadLimitVal};
9396 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
9397 PushNumTeamsArgs);
9398}
9399
9400void CGOpenMPRuntime::emitTargetDataCalls(
9401 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9402 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
9403 if (!CGF.HaveInsertPoint())
9404 return;
9405
9406 // Action used to replace the default codegen action and turn privatization
9407 // off.
9408 PrePostActionTy NoPrivAction;
9409
9410 // Generate the code for the opening of the data environment. Capture all the
9411 // arguments of the runtime call by reference because they are used in the
9412 // closing of the region.
9413 auto &&BeginThenGen = [this, &D, Device, &Info,
9414 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
9415 // Fill up the arrays with all the mapped variables.
9416 MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9417 MappableExprsHandler::MapValuesArrayTy Pointers;
9418 MappableExprsHandler::MapValuesArrayTy Sizes;
9419 MappableExprsHandler::MapFlagsArrayTy MapTypes;
9420
9421 // Get map clause information.
9422 MappableExprsHandler MCHandler(D, CGF);
9423 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
9424
9425 // Fill up the arrays and create the arguments.
9426 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9427
9428 llvm::Value *BasePointersArrayArg = nullptr;
9429 llvm::Value *PointersArrayArg = nullptr;
9430 llvm::Value *SizesArrayArg = nullptr;
9431 llvm::Value *MapTypesArrayArg = nullptr;
9432 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
9433 SizesArrayArg, MapTypesArrayArg, Info);
9434
9435 // Emit device ID if any.
9436 llvm::Value *DeviceID = nullptr;
9437 if (Device) {
9438 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9439 CGF.Int64Ty, /*isSigned=*/true);
9440 } else {
9441 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9442 }
9443
9444 // Emit the number of elements in the offloading arrays.
9445 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
9446
9447 llvm::Value *OffloadingArgs[] = {
9448 DeviceID, PointerNum, BasePointersArrayArg,
9449 PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
9450 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
9451 OffloadingArgs);
9452
9453 // If device pointer privatization is required, emit the body of the region
9454 // here. It will have to be duplicated: with and without privatization.
9455 if (!Info.CaptureDeviceAddrMap.empty())
9456 CodeGen(CGF);
9457 };
9458
9459 // Generate code for the closing of the data region.
9460 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
9461 PrePostActionTy &) {
9462 assert(Info.isValid() && "Invalid data environment closing arguments.");
9463
9464 llvm::Value *BasePointersArrayArg = nullptr;
9465 llvm::Value *PointersArrayArg = nullptr;
9466 llvm::Value *SizesArrayArg = nullptr;
9467 llvm::Value *MapTypesArrayArg = nullptr;
9468 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
9469 SizesArrayArg, MapTypesArrayArg, Info);
9470
9471 // Emit device ID if any.
9472 llvm::Value *DeviceID = nullptr;
9473 if (Device) {
9474 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9475 CGF.Int64Ty, /*isSigned=*/true);
9476 } else {
9477 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9478 }
9479
9480 // Emit the number of elements in the offloading arrays.
9481 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
9482
9483 llvm::Value *OffloadingArgs[] = {
9484 DeviceID, PointerNum, BasePointersArrayArg,
9485 PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
9486 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
9487 OffloadingArgs);
9488 };
9489
9490 // If we need device pointer privatization, we need to emit the body of the
9491 // region with no privatization in the 'else' branch of the conditional.
9492 // Otherwise, we don't have to do anything.
9493 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
9494 PrePostActionTy &) {
9495 if (!Info.CaptureDeviceAddrMap.empty()) {
9496 CodeGen.setAction(NoPrivAction);
9497 CodeGen(CGF);
9498 }
9499 };
9500
9501 // We don't have to do anything to close the region if the if clause evaluates
9502 // to false.
9503 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
9504
9505 if (IfCond) {
9506 emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
9507 } else {
9508 RegionCodeGenTy RCG(BeginThenGen);
9509 RCG(CGF);
9510 }
9511
9512 // If we don't require privatization of device pointers, we emit the body in
9513 // between the runtime calls. This avoids duplicating the body code.
9514 if (Info.CaptureDeviceAddrMap.empty()) {
9515 CodeGen.setAction(NoPrivAction);
9516 CodeGen(CGF);
9517 }
9518
9519 if (IfCond) {
9520 emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
9521 } else {
9522 RegionCodeGenTy RCG(EndThenGen);
9523 RCG(CGF);
9524 }
9525}
9526
9527void CGOpenMPRuntime::emitTargetDataStandAloneCall(
9528 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9529 const Expr *Device) {
9530 if (!CGF.HaveInsertPoint())
9531 return;
9532
9533 assert((isa<OMPTargetEnterDataDirective>(D) ||
9534 isa<OMPTargetExitDataDirective>(D) ||
9535 isa<OMPTargetUpdateDirective>(D)) &&
9536 "Expecting either target enter, exit data, or update directives.");
9537
9538 CodeGenFunction::OMPTargetDataInfo InputInfo;
9539 llvm::Value *MapTypesArray = nullptr;
9540 // Generate the code for the opening of the data environment.
9541 auto &&ThenGen = [this, &D, Device, &InputInfo,
9542 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
9543 // Emit device ID if any.
9544 llvm::Value *DeviceID = nullptr;
9545 if (Device) {
9546 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9547 CGF.Int64Ty, /*isSigned=*/true);
9548 } else {
9549 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9550 }
9551
9552 // Emit the number of elements in the offloading arrays.
9553 llvm::Constant *PointerNum =
9554 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9555
9556 llvm::Value *OffloadingArgs[] = {DeviceID,
9557 PointerNum,
9558 InputInfo.BasePointersArray.getPointer(),
9559 InputInfo.PointersArray.getPointer(),
9560 InputInfo.SizesArray.getPointer(),
9561 MapTypesArray};
9562
9563 // Select the right runtime function call for each expected standalone
9564 // directive.
9565 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9566 OpenMPRTLFunction RTLFn;
9567 switch (D.getDirectiveKind()) {
9568 case OMPD_target_enter_data:
9569 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
9570 : OMPRTL__tgt_target_data_begin;
9571 break;
9572 case OMPD_target_exit_data:
9573 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
9574 : OMPRTL__tgt_target_data_end;
9575 break;
9576 case OMPD_target_update:
9577 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
9578 : OMPRTL__tgt_target_data_update;
9579 break;
9580 case OMPD_parallel:
9581 case OMPD_for:
9582 case OMPD_parallel_for:
9583 case OMPD_parallel_sections:
9584 case OMPD_for_simd:
9585 case OMPD_parallel_for_simd:
9586 case OMPD_cancel:
9587 case OMPD_cancellation_point:
9588 case OMPD_ordered:
9589 case OMPD_threadprivate:
9590 case OMPD_allocate:
9591 case OMPD_task:
9592 case OMPD_simd:
9593 case OMPD_sections:
9594 case OMPD_section:
9595 case OMPD_single:
9596 case OMPD_master:
9597 case OMPD_critical:
9598 case OMPD_taskyield:
9599 case OMPD_barrier:
9600 case OMPD_taskwait:
9601 case OMPD_taskgroup:
9602 case OMPD_atomic:
9603 case OMPD_flush:
9604 case OMPD_teams:
9605 case OMPD_target_data:
9606 case OMPD_distribute:
9607 case OMPD_distribute_simd:
9608 case OMPD_distribute_parallel_for:
9609 case OMPD_distribute_parallel_for_simd:
9610 case OMPD_teams_distribute:
9611 case OMPD_teams_distribute_simd:
9612 case OMPD_teams_distribute_parallel_for:
9613 case OMPD_teams_distribute_parallel_for_simd:
9614 case OMPD_declare_simd:
9615 case OMPD_declare_target:
9616 case OMPD_end_declare_target:
9617 case OMPD_declare_reduction:
9618 case OMPD_declare_mapper:
9619 case OMPD_taskloop:
9620 case OMPD_taskloop_simd:
9621 case OMPD_target:
9622 case OMPD_target_simd:
9623 case OMPD_target_teams_distribute:
9624 case OMPD_target_teams_distribute_simd:
9625 case OMPD_target_teams_distribute_parallel_for:
9626 case OMPD_target_teams_distribute_parallel_for_simd:
9627 case OMPD_target_teams:
9628 case OMPD_target_parallel:
9629 case OMPD_target_parallel_for:
9630 case OMPD_target_parallel_for_simd:
9631 case OMPD_requires:
9632 case OMPD_unknown:
9633 llvm_unreachable("Unexpected standalone target data directive.");
9634 break;
9635 }
9636 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
9637 };
9638
9639 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
9640 CodeGenFunction &CGF, PrePostActionTy &) {
9641 // Fill up the arrays with all the mapped variables.
9642 MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9643 MappableExprsHandler::MapValuesArrayTy Pointers;
9644 MappableExprsHandler::MapValuesArrayTy Sizes;
9645 MappableExprsHandler::MapFlagsArrayTy MapTypes;
9646
9647 // Get map clause information.
9648 MappableExprsHandler MEHandler(D, CGF);
9649 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
9650
9651 TargetDataInfo Info;
9652 // Fill up the arrays and create the arguments.
9653 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9654 emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9655 Info.PointersArray, Info.SizesArray,
9656 Info.MapTypesArray, Info);
9657 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9658 InputInfo.BasePointersArray =
9659 Address(Info.BasePointersArray, CGM.getPointerAlign());
9660 InputInfo.PointersArray =
9661 Address(Info.PointersArray, CGM.getPointerAlign());
9662 InputInfo.SizesArray =
9663 Address(Info.SizesArray, CGM.getPointerAlign());
9664 MapTypesArray = Info.MapTypesArray;
9665 if (D.hasClausesOfKind<OMPDependClause>())
9666 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9667 else
9668 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9669 };
9670
9671 if (IfCond) {
9672 emitOMPIfClause(CGF, IfCond, TargetThenGen,
9673 [](CodeGenFunction &CGF, PrePostActionTy &) {});
9674 } else {
9675 RegionCodeGenTy ThenRCG(TargetThenGen);
9676 ThenRCG(CGF);
9677 }
9678}
9679
9680namespace {
9681 /// Kind of parameter in a function with 'declare simd' directive.
9682 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
9683 /// Attribute set of the parameter.
9684 struct ParamAttrTy {
9685 ParamKindTy Kind = Vector;
9686 llvm::APSInt StrideOrArg;
9687 llvm::APSInt Alignment;
9688 };
9689} // namespace
9690
9691static unsigned evaluateCDTSize(const FunctionDecl *FD,
9692 ArrayRef<ParamAttrTy> ParamAttrs) {
9693 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
9694 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
9695 // of that clause. The VLEN value must be power of 2.
9696 // In other case the notion of the function`s "characteristic data type" (CDT)
9697 // is used to compute the vector length.
9698 // CDT is defined in the following order:
9699 // a) For non-void function, the CDT is the return type.
9700 // b) If the function has any non-uniform, non-linear parameters, then the
9701 // CDT is the type of the first such parameter.
9702 // c) If the CDT determined by a) or b) above is struct, union, or class
9703 // type which is pass-by-value (except for the type that maps to the
9704 // built-in complex data type), the characteristic data type is int.
9705 // d) If none of the above three cases is applicable, the CDT is int.
9706 // The VLEN is then determined based on the CDT and the size of vector
9707 // register of that ISA for which current vector version is generated. The
9708 // VLEN is computed using the formula below:
9709 // VLEN = sizeof(vector_register) / sizeof(CDT),
9710 // where vector register size specified in section 3.2.1 Registers and the
9711 // Stack Frame of original AMD64 ABI document.
9712 QualType RetType = FD->getReturnType();
9713 if (RetType.isNull())
9714 return 0;
9715 ASTContext &C = FD->getASTContext();
9716 QualType CDT;
9717 if (!RetType.isNull() && !RetType->isVoidType()) {
9718 CDT = RetType;
9719 } else {
9720 unsigned Offset = 0;
9721 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
9722 if (ParamAttrs[Offset].Kind == Vector)
9723 CDT = C.getPointerType(C.getRecordType(MD->getParent()));
9724 ++Offset;
9725 }
9726 if (CDT.isNull()) {
9727 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
9728 if (ParamAttrs[I + Offset].Kind == Vector) {
9729 CDT = FD->getParamDecl(I)->getType();
9730 break;
9731 }
9732 }
9733 }
9734 }
9735 if (CDT.isNull())
9736 CDT = C.IntTy;
9737 CDT = CDT->getCanonicalTypeUnqualified();
9738 if (CDT->isRecordType() || CDT->isUnionType())
9739 CDT = C.IntTy;
9740 return C.getTypeSize(CDT);
9741}
9742
9743static void
9744emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
9745 const llvm::APSInt &VLENVal,
9746 ArrayRef<ParamAttrTy> ParamAttrs,
9747 OMPDeclareSimdDeclAttr::BranchStateTy State) {
9748 struct ISADataTy {
9749 char ISA;
9750 unsigned VecRegSize;
9751 };
9752 ISADataTy ISAData[] = {
9753 {
9754 'b', 128
9755 }, // SSE
9756 {
9757 'c', 256
9758 }, // AVX
9759 {
9760 'd', 256
9761 }, // AVX2
9762 {
9763 'e', 512
9764 }, // AVX512
9765 };
9766 llvm::SmallVector<char, 2> Masked;
9767 switch (State) {
9768 case OMPDeclareSimdDeclAttr::BS_Undefined:
9769 Masked.push_back('N');
9770 Masked.push_back('M');
9771 break;
9772 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
9773 Masked.push_back('N');
9774 break;
9775 case OMPDeclareSimdDeclAttr::BS_Inbranch:
9776 Masked.push_back('M');
9777 break;
9778 }
9779 for (char Mask : Masked) {
9780 for (const ISADataTy &Data : ISAData) {
9781 SmallString<256> Buffer;
9782 llvm::raw_svector_ostream Out(Buffer);
9783 Out << "_ZGV" << Data.ISA << Mask;
9784 if (!VLENVal) {
9785 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
9786 assert(NumElts && "Non-zero simdlen/cdtsize expected");
9787 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
9788 } else {
9789 Out << VLENVal;
9790 }
9791 for (const ParamAttrTy &ParamAttr : ParamAttrs) {
9792 switch (ParamAttr.Kind){
9793 case LinearWithVarStride:
9794 Out << 's' << ParamAttr.StrideOrArg;
9795 break;
9796 case Linear:
9797 Out << 'l';
9798 if (!!ParamAttr.StrideOrArg)
9799 Out << ParamAttr.StrideOrArg;
9800 break;
9801 case Uniform:
9802 Out << 'u';
9803 break;
9804 case Vector:
9805 Out << 'v';
9806 break;
9807 }
9808 if (!!ParamAttr.Alignment)
9809 Out << 'a' << ParamAttr.Alignment;
9810 }
9811 Out << '_' << Fn->getName();
9812 Fn->addFnAttr(Out.str());
9813 }
9814 }
9815}
9816
9817// This are the Functions that are needed to mangle the name of the
9818// vector functions generated by the compiler, according to the rules
9819// defined in the "Vector Function ABI specifications for AArch64",
9820// available at
9821// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
9822
9823/// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
9824///
9825/// TODO: Need to implement the behavior for reference marked with a
9826/// var or no linear modifiers (1.b in the section). For this, we
9827/// need to extend ParamKindTy to support the linear modifiers.
9828static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
9829 QT = QT.getCanonicalType();
9830
9831 if (QT->isVoidType())
9832 return false;
9833
9834 if (Kind == ParamKindTy::Uniform)
9835 return false;
9836
9837 if (Kind == ParamKindTy::Linear)
9838 return false;
9839
9840 // TODO: Handle linear references with modifiers
9841
9842 if (Kind == ParamKindTy::LinearWithVarStride)
9843 return false;
9844
9845 return true;
9846}
9847
9848/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
9849static bool getAArch64PBV(QualType QT, ASTContext &C) {
9850 QT = QT.getCanonicalType();
9851 unsigned Size = C.getTypeSize(QT);
9852
9853 // Only scalars and complex within 16 bytes wide set PVB to true.
9854 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
9855 return false;
9856
9857 if (QT->isFloatingType())
9858 return true;
9859
9860 if (QT->isIntegerType())
9861 return true;
9862
9863 if (QT->isPointerType())
9864 return true;
9865
9866 // TODO: Add support for complex types (section 3.1.2, item 2).
9867
9868 return false;
9869}
9870
9871/// Computes the lane size (LS) of a return type or of an input parameter,
9872/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
9873/// TODO: Add support for references, section 3.2.1, item 1.
9874static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
9875 if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
9876 QualType PTy = QT.getCanonicalType()->getPointeeType();
9877 if (getAArch64PBV(PTy, C))
9878 return C.getTypeSize(PTy);
9879 }
9880 if (getAArch64PBV(QT, C))
9881 return C.getTypeSize(QT);
9882
9883 return C.getTypeSize(C.getUIntPtrType());
9884}
9885
9886// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
9887// signature of the scalar function, as defined in 3.2.2 of the
9888// AAVFABI.
9889static std::tuple<unsigned, unsigned, bool>
9890getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
9891 QualType RetType = FD->getReturnType().getCanonicalType();
9892
9893 ASTContext &C = FD->getASTContext();
9894
9895 bool OutputBecomesInput = false;
9896
9897 llvm::SmallVector<unsigned, 8> Sizes;
9898 if (!RetType->isVoidType()) {
9899 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
9900 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
9901 OutputBecomesInput = true;
9902 }
9903 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
9904 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
9905 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
9906 }
9907
9908 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
9909 // The LS of a function parameter / return value can only be a power
9910 // of 2, starting from 8 bits, up to 128.
9911 assert(std::all_of(Sizes.begin(), Sizes.end(),
9912 [](unsigned Size) {
9913 return Size == 8 || Size == 16 || Size == 32 ||
9914 Size == 64 || Size == 128;
9915 }) &&
9916 "Invalid size");
9917
9918 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
9919 *std::max_element(std::begin(Sizes), std::end(Sizes)),
9920 OutputBecomesInput);
9921}
9922
9923/// Mangle the parameter part of the vector function name according to
9924/// their OpenMP classification. The mangling function is defined in
9925/// section 3.5 of the AAVFABI.
9926static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
9927 SmallString<256> Buffer;
9928 llvm::raw_svector_ostream Out(Buffer);
9929 for (const auto &ParamAttr : ParamAttrs) {
9930 switch (ParamAttr.Kind) {
9931 case LinearWithVarStride:
9932 Out << "ls" << ParamAttr.StrideOrArg;
9933 break;
9934 case Linear:
9935 Out << 'l';
9936 // Don't print the step value if it is not present or if it is
9937 // equal to 1.
9938 if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1)
9939 Out << ParamAttr.StrideOrArg;
9940 break;
9941 case Uniform:
9942 Out << 'u';
9943 break;
9944 case Vector:
9945 Out << 'v';
9946 break;
9947 }
9948
9949 if (!!ParamAttr.Alignment)
9950 Out << 'a' << ParamAttr.Alignment;
9951 }
9952
9953 return Out.str();
9954}
9955
9956// Function used to add the attribute. The parameter `VLEN` is
9957// templated to allow the use of "x" when targeting scalable functions
9958// for SVE.
9959template <typename T>
9960static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
9961 char ISA, StringRef ParSeq,
9962 StringRef MangledName, bool OutputBecomesInput,
9963 llvm::Function *Fn) {
9964 SmallString<256> Buffer;
9965 llvm::raw_svector_ostream Out(Buffer);
9966 Out << Prefix << ISA << LMask << VLEN;
9967 if (OutputBecomesInput)
9968 Out << "v";
9969 Out << ParSeq << "_" << MangledName;
9970 Fn->addFnAttr(Out.str());
9971}
9972
9973// Helper function to generate the Advanced SIMD names depending on
9974// the value of the NDS when simdlen is not present.
9975static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
9976 StringRef Prefix, char ISA,
9977 StringRef ParSeq, StringRef MangledName,
9978 bool OutputBecomesInput,
9979 llvm::Function *Fn) {
9980 switch (NDS) {
9981 case 8:
9982 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
9983 OutputBecomesInput, Fn);
9984 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
9985 OutputBecomesInput, Fn);
9986 break;
9987 case 16:
9988 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
9989 OutputBecomesInput, Fn);
9990 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
9991 OutputBecomesInput, Fn);
9992 break;
9993 case 32:
9994 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
9995 OutputBecomesInput, Fn);
9996 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
9997 OutputBecomesInput, Fn);
9998 break;
9999 case 64:
10000 case 128:
10001 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10002 OutputBecomesInput, Fn);
10003 break;
10004 default:
10005 llvm_unreachable("Scalar type is too wide.");
10006 }
10007}
10008
10009/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10010static void emitAArch64DeclareSimdFunction(
10011 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10012 ArrayRef<ParamAttrTy> ParamAttrs,
10013 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10014 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10015
10016 // Get basic data for building the vector signature.
10017 const auto Data = getNDSWDS(FD, ParamAttrs);
10018 const unsigned NDS = std::get<0>(Data);
10019 const unsigned WDS = std::get<1>(Data);
10020 const bool OutputBecomesInput = std::get<2>(Data);
10021
10022 // Check the values provided via `simdlen` by the user.
10023 // 1. A `simdlen(1)` doesn't produce vector signatures,
10024 if (UserVLEN == 1) {
10025 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10026 DiagnosticsEngine::Warning,
10027 "The clause simdlen(1) has no effect when targeting aarch64.");
10028 CGM.getDiags().Report(SLoc, DiagID);
10029 return;
10030 }
10031
10032 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10033 // Advanced SIMD output.
10034 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10035 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10036 DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10037 "power of 2 when targeting Advanced SIMD.");
10038 CGM.getDiags().Report(SLoc, DiagID);
10039 return;
10040 }
10041
10042 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10043 // limits.
10044 if (ISA == 's' && UserVLEN != 0) {
10045 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10046 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10047 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10048 "lanes in the architectural constraints "
10049 "for SVE (min is 128-bit, max is "
10050 "2048-bit, by steps of 128-bit)");
10051 CGM.getDiags().Report(SLoc, DiagID) << WDS;
10052 return;
10053 }
10054 }
10055
10056 // Sort out parameter sequence.
10057 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10058 StringRef Prefix = "_ZGV";
10059 // Generate simdlen from user input (if any).
10060 if (UserVLEN) {
10061 if (ISA == 's') {
10062 // SVE generates only a masked function.
10063 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10064 OutputBecomesInput, Fn);
10065 } else {
10066 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10067 // Advanced SIMD generates one or two functions, depending on
10068 // the `[not]inbranch` clause.
10069 switch (State) {
10070 case OMPDeclareSimdDeclAttr::BS_Undefined:
10071 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10072 OutputBecomesInput, Fn);
10073 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10074 OutputBecomesInput, Fn);
10075 break;
10076 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10077 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10078 OutputBecomesInput, Fn);
10079 break;
10080 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10081 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10082 OutputBecomesInput, Fn);
10083 break;
10084 }
10085 }
10086 } else {
10087 // If no user simdlen is provided, follow the AAVFABI rules for
10088 // generating the vector length.
10089 if (ISA == 's') {
10090 // SVE, section 3.4.1, item 1.
10091 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10092 OutputBecomesInput, Fn);
10093 } else {
10094 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10095 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10096 // two vector names depending on the use of the clause
10097 // `[not]inbranch`.
10098 switch (State) {
10099 case OMPDeclareSimdDeclAttr::BS_Undefined:
10100 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10101 OutputBecomesInput, Fn);
10102 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10103 OutputBecomesInput, Fn);
10104 break;
10105 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10106 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10107 OutputBecomesInput, Fn);
10108 break;
10109 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10110 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10111 OutputBecomesInput, Fn);
10112 break;
10113 }
10114 }
10115 }
10116}
10117
10118void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10119 llvm::Function *Fn) {
10120 ASTContext &C = CGM.getContext();
10121 FD = FD->getMostRecentDecl();
10122 // Map params to their positions in function decl.
10123 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10124 if (isa<CXXMethodDecl>(FD))
10125 ParamPositions.try_emplace(FD, 0);
10126 unsigned ParamPos = ParamPositions.size();
10127 for (const ParmVarDecl *P : FD->parameters()) {
10128 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10129 ++ParamPos;
10130 }
10131 while (FD) {
10132 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10133 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10134 // Mark uniform parameters.
10135 for (const Expr *E : Attr->uniforms()) {
10136 E = E->IgnoreParenImpCasts();
10137 unsigned Pos;
10138 if (isa<CXXThisExpr>(E)) {
10139 Pos = ParamPositions[FD];
10140 } else {
10141 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10142 ->getCanonicalDecl();
10143 Pos = ParamPositions[PVD];
10144 }
10145 ParamAttrs[Pos].Kind = Uniform;
10146 }
10147 // Get alignment info.
10148 auto NI = Attr->alignments_begin();
10149 for (const Expr *E : Attr->aligneds()) {
10150 E = E->IgnoreParenImpCasts();
10151 unsigned Pos;
10152 QualType ParmTy;
10153 if (isa<CXXThisExpr>(E)) {
10154 Pos = ParamPositions[FD];
10155 ParmTy = E->getType();
10156 } else {
10157 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10158 ->getCanonicalDecl();
10159 Pos = ParamPositions[PVD];
10160 ParmTy = PVD->getType();
10161 }
10162 ParamAttrs[Pos].Alignment =
10163 (*NI)
10164 ? (*NI)->EvaluateKnownConstInt(C)
10165 : llvm::APSInt::getUnsigned(
10166 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10167 .getQuantity());
10168 ++NI;
10169 }
10170 // Mark linear parameters.
10171 auto SI = Attr->steps_begin();
10172 auto MI = Attr->modifiers_begin();
10173 for (const Expr *E : Attr->linears()) {
10174 E = E->IgnoreParenImpCasts();
10175 unsigned Pos;
10176 if (isa<CXXThisExpr>(E)) {
10177 Pos = ParamPositions[FD];
10178 } else {
10179 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10180 ->getCanonicalDecl();
10181 Pos = ParamPositions[PVD];
10182 }
10183 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10184 ParamAttr.Kind = Linear;
10185 if (*SI) {
10186 Expr::EvalResult Result;
10187 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10188 if (const auto *DRE =
10189 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10190 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10191 ParamAttr.Kind = LinearWithVarStride;
10192 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10193 ParamPositions[StridePVD->getCanonicalDecl()]);
10194 }
10195 }
10196 } else {
10197 ParamAttr.StrideOrArg = Result.Val.getInt();
10198 }
10199 }
10200 ++SI;
10201 ++MI;
10202 }
10203 llvm::APSInt VLENVal;
10204 SourceLocation ExprLoc;
10205 const Expr *VLENExpr = Attr->getSimdlen();
10206 if (VLENExpr) {
10207 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10208 ExprLoc = VLENExpr->getExprLoc();
10209 }
10210 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10211 if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
10212 CGM.getTriple().getArch() == llvm::Triple::x86_64) {
10213 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10214 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10215 unsigned VLEN = VLENVal.getExtValue();
10216 StringRef MangledName = Fn->getName();
10217 if (CGM.getTarget().hasFeature("sve"))
10218 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10219 MangledName, 's', 128, Fn, ExprLoc);
10220 if (CGM.getTarget().hasFeature("neon"))
10221 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10222 MangledName, 'n', 128, Fn, ExprLoc);
10223 }
10224 }
10225 FD = FD->getPreviousDecl();
10226 }
10227}
10228
10229namespace {
10230/// Cleanup action for doacross support.
10231class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10232public:
10233 static const int DoacrossFinArgs = 2;
10234
10235private:
10236 llvm::FunctionCallee RTLFn;
10237 llvm::Value *Args[DoacrossFinArgs];
10238
10239public:
10240 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10241 ArrayRef<llvm::Value *> CallArgs)
10242 : RTLFn(RTLFn) {
10243 assert(CallArgs.size() == DoacrossFinArgs);
10244 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10245 }
10246 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10247 if (!CGF.HaveInsertPoint())
10248 return;
10249 CGF.EmitRuntimeCall(RTLFn, Args);
10250 }
10251};
10252} // namespace
10253
10254void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10255 const OMPLoopDirective &D,
10256 ArrayRef<Expr *> NumIterations) {
10257 if (!CGF.HaveInsertPoint())
10258 return;
10259
10260 ASTContext &C = CGM.getContext();
10261 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
10262 RecordDecl *RD;
10263 if (KmpDimTy.isNull()) {
10264 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
10265 // kmp_int64 lo; // lower
10266 // kmp_int64 up; // upper
10267 // kmp_int64 st; // stride
10268 // };
10269 RD = C.buildImplicitRecord("kmp_dim");
10270 RD->startDefinition();
10271 addFieldToRecordDecl(C, RD, Int64Ty);
10272 addFieldToRecordDecl(C, RD, Int64Ty);
10273 addFieldToRecordDecl(C, RD, Int64Ty);
10274 RD->completeDefinition();
10275 KmpDimTy = C.getRecordType(RD);
10276 } else {
10277 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
10278 }
10279 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
10280 QualType ArrayTy =
10281 C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0);
10282
10283 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
10284 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
10285 enum { LowerFD = 0, UpperFD, StrideFD };
10286 // Fill dims with data.
10287 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
10288 LValue DimsLVal = CGF.MakeAddrLValue(
10289 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
10290 // dims.upper = num_iterations;
10291 LValue UpperLVal = CGF.EmitLValueForField(
10292 DimsLVal, *std::next(RD->field_begin(), UpperFD));
10293 llvm::Value *NumIterVal =
10294 CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
10295 D.getNumIterations()->getType(), Int64Ty,
10296 D.getNumIterations()->getExprLoc());
10297 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
10298 // dims.stride = 1;
10299 LValue StrideLVal = CGF.EmitLValueForField(
10300 DimsLVal, *std::next(RD->field_begin(), StrideFD));
10301 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
10302 StrideLVal);
10303 }
10304
10305 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
10306 // kmp_int32 num_dims, struct kmp_dim * dims);
10307 llvm::Value *Args[] = {
10308 emitUpdateLocation(CGF, D.getBeginLoc()),
10309 getThreadID(CGF, D.getBeginLoc()),
10310 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
10311 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
10312 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
10313 CGM.VoidPtrTy)};
10314
10315 llvm::FunctionCallee RTLFn =
10316 createRuntimeFunction(OMPRTL__kmpc_doacross_init);
10317 CGF.EmitRuntimeCall(RTLFn, Args);
10318 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
10319 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
10320 llvm::FunctionCallee FiniRTLFn =
10321 createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
10322 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10323 llvm::makeArrayRef(FiniArgs));
10324}
10325
10326void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
10327 const OMPDependClause *C) {
10328 QualType Int64Ty =
10329 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
10330 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
10331 QualType ArrayTy = CGM.getContext().getConstantArrayType(
10332 Int64Ty, Size, ArrayType::Normal, 0);
10333 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
10334 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
10335 const Expr *CounterVal = C->getLoopData(I);
10336 assert(CounterVal);
10337 llvm::Value *CntVal = CGF.EmitScalarConversion(
10338 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
10339 CounterVal->getExprLoc());
10340 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
10341 /*Volatile=*/false, Int64Ty);
10342 }
10343 llvm::Value *Args[] = {
10344 emitUpdateLocation(CGF, C->getBeginLoc()),
10345 getThreadID(CGF, C->getBeginLoc()),
10346 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
10347 llvm::FunctionCallee RTLFn;
10348 if (C->getDependencyKind() == OMPC_DEPEND_source) {
10349 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
10350 } else {
10351 assert(C->getDependencyKind() == OMPC_DEPEND_sink);
10352 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
10353 }
10354 CGF.EmitRuntimeCall(RTLFn, Args);
10355}
10356
10357void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
10358 llvm::FunctionCallee Callee,
10359 ArrayRef<llvm::Value *> Args) const {
10360 assert(Loc.isValid() && "Outlined function call location must be valid.");
10361 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
10362
10363 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
10364 if (Fn->doesNotThrow()) {
10365 CGF.EmitNounwindRuntimeCall(Fn, Args);
10366 return;
10367 }
10368 }
10369 CGF.EmitRuntimeCall(Callee, Args);
10370}
10371
10372void CGOpenMPRuntime::emitOutlinedFunctionCall(
10373 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
10374 ArrayRef<llvm::Value *> Args) const {
10375 emitCall(CGF, Loc, OutlinedFn, Args);
10376}
10377
10378void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
10379 if (const auto *FD = dyn_cast<FunctionDecl>(D))
10380 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
10381 HasEmittedDeclareTargetRegion = true;
10382}
10383
10384Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
10385 const VarDecl *NativeParam,
10386 const VarDecl *TargetParam) const {
10387 return CGF.GetAddrOfLocalVar(NativeParam);
10388}
10389
10390namespace {
10391/// Cleanup action for allocate support.
10392class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
10393public:
10394 static const int CleanupArgs = 3;
10395
10396private:
10397 llvm::FunctionCallee RTLFn;
10398 llvm::Value *Args[CleanupArgs];
10399
10400public:
10401 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
10402 ArrayRef<llvm::Value *> CallArgs)
10403 : RTLFn(RTLFn) {
10404 assert(CallArgs.size() == CleanupArgs &&
10405 "Size of arguments does not match.");
10406 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10407 }
10408 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10409 if (!CGF.HaveInsertPoint())
10410 return;
10411 CGF.EmitRuntimeCall(RTLFn, Args);
10412 }
10413};
10414} // namespace
10415
10416Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
10417 const VarDecl *VD) {
10418 if (!VD)
10419 return Address::invalid();
10420 const VarDecl *CVD = VD->getCanonicalDecl();
10421 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
10422 return Address::invalid();
10423 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
10424 // Use the default allocation.
10425 if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
10426 !AA->getAllocator())
10427 return Address::invalid();
10428 llvm::Value *Size;
10429 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
10430 if (CVD->getType()->isVariablyModifiedType()) {
10431 Size = CGF.getTypeSize(CVD->getType());
10432 // Align the size: ((size + align - 1) / align) * align
10433 Size = CGF.Builder.CreateNUWAdd(
10434 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
10435 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
10436 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
10437 } else {
10438 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
10439 Size = CGM.getSize(Sz.alignTo(Align));
10440 }
10441 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
10442 assert(AA->getAllocator() &&
10443 "Expected allocator expression for non-default allocator.");
10444 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
10445 // According to the standard, the original allocator type is a enum (integer).
10446 // Convert to pointer type, if required.
10447 if (Allocator->getType()->isIntegerTy())
10448 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
10449 else if (Allocator->getType()->isPointerTy())
10450 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
10451 CGM.VoidPtrTy);
10452 llvm::Value *Args[] = {ThreadID, Size, Allocator};
10453
10454 llvm::Value *Addr =
10455 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
10456 CVD->getName() + ".void.addr");
10457 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
10458 Allocator};
10459 llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
10460
10461 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10462 llvm::makeArrayRef(FiniArgs));
10463 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
10464 Addr,
10465 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
10466 CVD->getName() + ".addr");
10467 return Address(Addr, Align);
10468}
10469
10470llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
10471 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
10472 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
10473 llvm_unreachable("Not supported in SIMD-only mode");
10474}
10475
10476llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
10477 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
10478 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
10479 llvm_unreachable("Not supported in SIMD-only mode");
10480}
10481
10482llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
10483 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
10484 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
10485 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
10486 bool Tied, unsigned &NumberOfParts) {
10487 llvm_unreachable("Not supported in SIMD-only mode");
10488}
10489
10490void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
10491 SourceLocation Loc,
10492 llvm::Function *OutlinedFn,
10493 ArrayRef<llvm::Value *> CapturedVars,
10494 const Expr *IfCond) {
10495 llvm_unreachable("Not supported in SIMD-only mode");
10496}
10497
10498void CGOpenMPSIMDRuntime::emitCriticalRegion(
10499 CodeGenFunction &CGF, StringRef CriticalName,
10500 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
10501 const Expr *Hint) {
10502 llvm_unreachable("Not supported in SIMD-only mode");
10503}
10504
10505void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
10506 const RegionCodeGenTy &MasterOpGen,
10507 SourceLocation Loc) {
10508 llvm_unreachable("Not supported in SIMD-only mode");
10509}
10510
10511void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
10512 SourceLocation Loc) {
10513 llvm_unreachable("Not supported in SIMD-only mode");
10514}
10515
10516void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
10517 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
10518 SourceLocation Loc) {
10519 llvm_unreachable("Not supported in SIMD-only mode");
10520}
10521
10522void CGOpenMPSIMDRuntime::emitSingleRegion(
10523 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
10524 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
10525 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
10526 ArrayRef<const Expr *> AssignmentOps) {
10527 llvm_unreachable("Not supported in SIMD-only mode");
10528}
10529
10530void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
10531 const RegionCodeGenTy &OrderedOpGen,
10532 SourceLocation Loc,
10533 bool IsThreads) {
10534 llvm_unreachable("Not supported in SIMD-only mode");
10535}
10536
10537void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
10538 SourceLocation Loc,
10539 OpenMPDirectiveKind Kind,
10540 bool EmitChecks,
10541 bool ForceSimpleCall) {
10542 llvm_unreachable("Not supported in SIMD-only mode");
10543}
10544
10545void CGOpenMPSIMDRuntime::emitForDispatchInit(
10546 CodeGenFunction &CGF, SourceLocation Loc,
10547 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
10548 bool Ordered, const DispatchRTInput &DispatchValues) {
10549 llvm_unreachable("Not supported in SIMD-only mode");
10550}
10551
10552void CGOpenMPSIMDRuntime::emitForStaticInit(
10553 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
10554 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
10555 llvm_unreachable("Not supported in SIMD-only mode");
10556}
10557
10558void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
10559 CodeGenFunction &CGF, SourceLocation Loc,
10560 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
10561 llvm_unreachable("Not supported in SIMD-only mode");
10562}
10563
10564void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
10565 SourceLocation Loc,
10566 unsigned IVSize,
10567 bool IVSigned) {
10568 llvm_unreachable("Not supported in SIMD-only mode");
10569}
10570
10571void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
10572 SourceLocation Loc,
10573 OpenMPDirectiveKind DKind) {
10574 llvm_unreachable("Not supported in SIMD-only mode");
10575}
10576
10577llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
10578 SourceLocation Loc,
10579 unsigned IVSize, bool IVSigned,
10580 Address IL, Address LB,
10581 Address UB, Address ST) {
10582 llvm_unreachable("Not supported in SIMD-only mode");
10583}
10584
10585void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
10586 llvm::Value *NumThreads,
10587 SourceLocation Loc) {
10588 llvm_unreachable("Not supported in SIMD-only mode");
10589}
10590
10591void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
10592 OpenMPProcBindClauseKind ProcBind,
10593 SourceLocation Loc) {
10594 llvm_unreachable("Not supported in SIMD-only mode");
10595}
10596
10597Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
10598 const VarDecl *VD,
10599 Address VDAddr,
10600 SourceLocation Loc) {
10601 llvm_unreachable("Not supported in SIMD-only mode");
10602}
10603
10604llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
10605 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
10606 CodeGenFunction *CGF) {
10607 llvm_unreachable("Not supported in SIMD-only mode");
10608}
10609
10610Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
10611 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
10612 llvm_unreachable("Not supported in SIMD-only mode");
10613}
10614
10615void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
10616 ArrayRef<const Expr *> Vars,
10617 SourceLocation Loc) {
10618 llvm_unreachable("Not supported in SIMD-only mode");
10619}
10620
10621void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
10622 const OMPExecutableDirective &D,
10623 llvm::Function *TaskFunction,
10624 QualType SharedsTy, Address Shareds,
10625 const Expr *IfCond,
10626 const OMPTaskDataTy &Data) {
10627 llvm_unreachable("Not supported in SIMD-only mode");
10628}
10629
10630void CGOpenMPSIMDRuntime::emitTaskLoopCall(
10631 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
10632 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
10633 const Expr *IfCond, const OMPTaskDataTy &Data) {
10634 llvm_unreachable("Not supported in SIMD-only mode");
10635}
10636
10637void CGOpenMPSIMDRuntime::emitReduction(
10638 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
10639 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
10640 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
10641 assert(Options.SimpleReduction && "Only simple reduction is expected.");
10642 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
10643 ReductionOps, Options);
10644}
10645
10646llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
10647 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
10648 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
10649 llvm_unreachable("Not supported in SIMD-only mode");
10650}
10651
10652void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
10653 SourceLocation Loc,
10654 ReductionCodeGen &RCG,
10655 unsigned N) {
10656 llvm_unreachable("Not supported in SIMD-only mode");
10657}
10658
10659Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
10660 SourceLocation Loc,
10661 llvm::Value *ReductionsPtr,
10662 LValue SharedLVal) {
10663 llvm_unreachable("Not supported in SIMD-only mode");
10664}
10665
10666void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
10667 SourceLocation Loc) {
10668 llvm_unreachable("Not supported in SIMD-only mode");
10669}
10670
10671void CGOpenMPSIMDRuntime::emitCancellationPointCall(
10672 CodeGenFunction &CGF, SourceLocation Loc,
10673 OpenMPDirectiveKind CancelRegion) {
10674 llvm_unreachable("Not supported in SIMD-only mode");
10675}
10676
10677void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
10678 SourceLocation Loc, const Expr *IfCond,
10679 OpenMPDirectiveKind CancelRegion) {
10680 llvm_unreachable("Not supported in SIMD-only mode");
10681}
10682
10683void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
10684 const OMPExecutableDirective &D, StringRef ParentName,
10685 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
10686 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
10687 llvm_unreachable("Not supported in SIMD-only mode");
10688}
10689
10690void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF,
10691 const OMPExecutableDirective &D,
10692 llvm::Function *OutlinedFn,
10693 llvm::Value *OutlinedFnID,
10694 const Expr *IfCond,
10695 const Expr *Device) {
10696 llvm_unreachable("Not supported in SIMD-only mode");
10697}
10698
10699bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
10700 llvm_unreachable("Not supported in SIMD-only mode");
10701}
10702
10703bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10704 llvm_unreachable("Not supported in SIMD-only mode");
10705}
10706
10707bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
10708 return false;
10709}
10710
10711llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() {
10712 return nullptr;
10713}
10714
10715void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
10716 const OMPExecutableDirective &D,
10717 SourceLocation Loc,
10718 llvm::Function *OutlinedFn,
10719 ArrayRef<llvm::Value *> CapturedVars) {
10720 llvm_unreachable("Not supported in SIMD-only mode");
10721}
10722
10723void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10724 const Expr *NumTeams,
10725 const Expr *ThreadLimit,
10726 SourceLocation Loc) {
10727 llvm_unreachable("Not supported in SIMD-only mode");
10728}
10729
10730void CGOpenMPSIMDRuntime::emitTargetDataCalls(
10731 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10732 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10733 llvm_unreachable("Not supported in SIMD-only mode");
10734}
10735
10736void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
10737 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10738 const Expr *Device) {
10739 llvm_unreachable("Not supported in SIMD-only mode");
10740}
10741
10742void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10743 const OMPLoopDirective &D,
10744 ArrayRef<Expr *> NumIterations) {
10745 llvm_unreachable("Not supported in SIMD-only mode");
10746}
10747
10748void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
10749 const OMPDependClause *C) {
10750 llvm_unreachable("Not supported in SIMD-only mode");
10751}
10752
10753const VarDecl *
10754CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
10755 const VarDecl *NativeParam) const {
10756 llvm_unreachable("Not supported in SIMD-only mode");
10757}
10758
10759Address
10760CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
10761 const VarDecl *NativeParam,
10762 const VarDecl *TargetParam) const {
10763 llvm_unreachable("Not supported in SIMD-only mode");
10764}
10765